From 4a964c33643a38dfb16937fde3ff2db2f3483008 Mon Sep 17 00:00:00 2001 From: KyungWoon Cho Date: Fri, 7 Sep 2018 14:13:38 +0900 Subject: [PATCH] Drop ptx common header by detecting the end of common line Old implementation which had dropped 50 characters before merging ptx code might insert a garbage code in some CUDA versions due to different newline character counts. CUDA tools 8.0 has this problem. --- libcuda/cuda_runtime_api.cc | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/libcuda/cuda_runtime_api.cc b/libcuda/cuda_runtime_api.cc index 9bdb993a9..d227fec7e 100644 --- a/libcuda/cuda_runtime_api.cc +++ b/libcuda/cuda_runtime_api.cc @@ -1538,6 +1538,27 @@ std::list pruneSectionList(std::list cuobj return prunedList; } +static const char * +findPtxBody(const char *ptxcode) +{ + const char *line_head; + + line_head = ptxcode; + while (line_head) { + const char *line_next; + + line_next = strchr(line_head, '\n'); + if (line_next == NULL) + return NULL; + line_next++; + if (strncmp(line_head, ".address_size", 13) == 0) + return line_next; + line_head = line_next; + } + + return NULL; +} + //! Merge all PTX sections that have a specific identifier into one file std::list mergeMatchingSections(std::list cuobjdumpSectionList, std::string identifier){ const char *ptxcode = ""; @@ -1559,13 +1580,13 @@ std::list mergeMatchingSections(std::list } // Append all the PTX from the last PTX section into the current PTX section - // Add 50 to ptxcode to ignore the information regarding version/target/address_size - if (strlen(ptxcode) >= 50) { + // Find the line after ".address_size" to ignore the information regarding version/target/address_size + ptxcode = findPtxBody(ptxcode); + if (ptxcode != NULL) { FILE *ptxfile = fopen((ptxsection->getPTXfilename()).c_str(), "a"); - fprintf(ptxfile, "%s", ptxcode + 50); + fprintf(ptxfile, "%s", ptxcode); fclose(ptxfile); } - old_iter = iter; old_ptxsection = ptxsection; }