From ece668fca09b2e32fe7900a1778c21d9642892a7 Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Mon, 26 Feb 2024 16:10:33 -0700
Subject: [PATCH 1/7] Add kp_functor_size: print parallel functor sizes

Prints the count, size, and name of functors passed to Kokkos parallel regions.
---
 README.md                                     |   4 +
 .../all/impl/Kokkos_Profiling_C_Interface.h   |  27 +++-
 .../all/impl/Kokkos_Profiling_Interface.hpp   |  42 +++--
 profiling/all/kp_core.hpp                     |   9 ++
 profiling/functor-size/CMakeLists.txt         |   1 +
 profiling/functor-size/Makefile               |  14 ++
 profiling/functor-size/kp_functor_size.cpp    | 143 ++++++++++++++++++
 7 files changed, 223 insertions(+), 17 deletions(-)
 create mode 100644 profiling/functor-size/CMakeLists.txt
 create mode 100644 profiling/functor-size/Makefile
 create mode 100644 profiling/functor-size/kp_functor_size.cpp

diff --git a/README.md b/README.md
index 73f1a902a..a0b8c0d33 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,10 @@ The following provides an overview of the tools available in the set of Kokkos T
 
     Prints Kokkos Kernel and Region events during runtime.
 
++ [**Functor Size**](https://github.com/kokkos/kokkos-tools/wiki/FunctorSize)
+
+    Prints information about the size of the functor objects passed to Kokkos parallel regions.
+
 ### 3rd Party Profiling Tool Hooks
 + [**VTuneConnector:**](https://github.com/kokkos/kokkos-tools/wiki/VTuneConnector)
     
diff --git a/profiling/all/impl/Kokkos_Profiling_C_Interface.h b/profiling/all/impl/Kokkos_Profiling_C_Interface.h
index c4aa9cce3..106c52811 100644
--- a/profiling/all/impl/Kokkos_Profiling_C_Interface.h
+++ b/profiling/all/impl/Kokkos_Profiling_C_Interface.h
@@ -1,3 +1,4 @@
+/*
 //@HEADER
 // ************************************************************************
 //
@@ -9,10 +10,11 @@
 // the U.S. Government retains certain rights in this software.
 //
 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
+//
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //@HEADER
+*/
 
 #ifndef KOKKOS_PROFILING_C_INTERFACE_HPP
 #define KOKKOS_PROFILING_C_INTERFACE_HPP
@@ -26,7 +28,7 @@
 #include <stdbool.h>
 #endif
 
-#define KOKKOSP_INTERFACE_VERSION 20210623
+#define KOKKOSP_INTERFACE_VERSION 20211015
 
 // Profiling
 
@@ -38,6 +40,19 @@ struct Kokkos_Profiling_SpaceHandle {
   char name[64];
 };
 
+const static int Kokkos_Profiling_Kernel_Static_Info_Size = 512;
+
+// uses a union to ensure the struct is as large as the target size
+struct Kokkos_Profiling_Kernel_Static_Info {
+  union {
+    struct {
+      uint64_t functor_size;  // sizeof the functor
+    };
+
+    char padding[Kokkos_Profiling_Kernel_Static_Info_Size];
+  };
+};
+
 // NOLINTNEXTLINE(modernize-use-using): C compatibility
 typedef void (*Kokkos_Profiling_initFunction)(
     const int, const uint64_t, const uint32_t,
@@ -54,6 +69,10 @@ typedef void (*Kokkos_Profiling_beginFunction)(const char*, const uint32_t,
 // NOLINTNEXTLINE(modernize-use-using): C compatibility
 typedef void (*Kokkos_Profiling_endFunction)(uint64_t);
 
+// NOLINTNEXTLINE(modernize-use-using): C compatibility
+typedef void (*Kokkos_Profiling_markKernelStaticInfoFunction)(
+    uint64_t, const struct Kokkos_Profiling_Kernel_Static_Info*);
+
 // NOLINTNEXTLINE(modernize-use-using): C compatibility
 typedef void (*Kokkos_Profiling_pushFunction)(const char*);
 // NOLINTNEXTLINE(modernize-use-using): C compatibility
@@ -247,6 +266,7 @@ struct Kokkos_Profiling_EventSet {
   Kokkos_Profiling_dualViewSyncFunction sync_dual_view;
   Kokkos_Profiling_dualViewModifyFunction modify_dual_view;
   Kokkos_Profiling_declareMetadataFunction declare_metadata;
+  Kokkos_Profiling_markKernelStaticInfoFunction mark_kernel_static_info;
   Kokkos_Tools_provideToolProgrammingInterfaceFunction
       provide_tool_programming_interface;
   Kokkos_Tools_requestToolSettingsFunction request_tool_settings;
@@ -257,7 +277,8 @@ struct Kokkos_Profiling_EventSet {
   Kokkos_Tools_contextBeginFunction begin_tuning_context;
   Kokkos_Tools_contextEndFunction end_tuning_context;
   Kokkos_Tools_optimizationGoalDeclarationFunction declare_optimization_goal;
-  char padding[232 *
+
+  char padding[231 *
                sizeof(
                    Kokkos_Tools_functionPointer)];  // allows us to add another
                                                     // 256 events to the Tools
diff --git a/profiling/all/impl/Kokkos_Profiling_Interface.hpp b/profiling/all/impl/Kokkos_Profiling_Interface.hpp
index 82ba15d19..d9bfd7145 100644
--- a/profiling/all/impl/Kokkos_Profiling_Interface.hpp
+++ b/profiling/all/impl/Kokkos_Profiling_Interface.hpp
@@ -19,6 +19,7 @@
 
 #include <cinttypes>
 #include <cstddef>
+#include <climits>
 
 #include <cstdlib>
 
@@ -45,6 +46,7 @@ enum struct DeviceType {
   HPX,
   Threads,
   SYCL,
+  OpenACC,
   Unknown
 };
 
@@ -53,6 +55,12 @@ struct ExecutionSpaceIdentifier {
   uint32_t device_id;
   uint32_t instance_id;
 };
+
+constexpr const uint32_t num_type_bits     = 8;
+constexpr const uint32_t num_device_bits   = 7;
+constexpr const uint32_t num_instance_bits = 17;
+constexpr const uint32_t num_avail_bits    = sizeof(uint32_t) * CHAR_BIT;
+
 inline DeviceType devicetype_from_uint32t(const uint32_t in) {
   switch (in) {
     case 0: return DeviceType::Serial;
@@ -63,37 +71,35 @@ inline DeviceType devicetype_from_uint32t(const uint32_t in) {
     case 5: return DeviceType::HPX;
     case 6: return DeviceType::Threads;
     case 7: return DeviceType::SYCL;
+    case 8: return DeviceType::OpenACC;
     default: return DeviceType::Unknown;  // TODO: error out?
   }
 }
 
 inline ExecutionSpaceIdentifier identifier_from_devid(const uint32_t in) {
-  // ExecutionSpaceIdentifier out;
-  // out.type = in >> 24;
-  // out.device_id = in >> 17;
-  // out.instance_id = ((uint32_t(-1)) << 17 ) & in;
-  return {devicetype_from_uint32t(in >> 24),
-          (~((uint32_t(-1)) << 24)) & (in >> 17),
-          (~((uint32_t(-1)) << 17)) & in};
+  constexpr const uint32_t shift = num_avail_bits - num_type_bits;
+
+  return {devicetype_from_uint32t(in >> shift), /*First 8 bits*/
+          (~((uint32_t(-1)) << num_device_bits)) &
+              (in >> num_instance_bits),                  /*Next 7 bits */
+          (~((uint32_t(-1)) << num_instance_bits)) & in}; /*Last 17 bits*/
 }
 
 template <typename ExecutionSpace>
 struct DeviceTypeTraits;
 
-constexpr const size_t device_type_bits = 8;
-constexpr const size_t instance_bits    = 24;
 template <typename ExecutionSpace>
 constexpr uint32_t device_id_root() {
-  /** uncomment when C++14 is enabled
   constexpr auto device_id =
       static_cast<uint32_t>(DeviceTypeTraits<ExecutionSpace>::id);
-  return (device_id << instance_bits);
-  */
-  return 0;
+  return (device_id << (num_instance_bits + num_device_bits));
 }
 template <typename ExecutionSpace>
 inline uint32_t device_id(ExecutionSpace const& space) noexcept {
-  return device_id_root<ExecutionSpace>() + space.impl_instance_id();
+  return device_id_root<ExecutionSpace>() +
+         (DeviceTypeTraits<ExecutionSpace>::device_id(space)
+          << num_instance_bits) +
+         space.impl_instance_id();
 }
 }  // namespace Experimental
 }  // namespace Tools
@@ -116,6 +122,12 @@ using SpaceHandle = Kokkos_Profiling_SpaceHandle;
 
 namespace Tools {
 
+using KernelStaticInfo = Kokkos_Profiling_Kernel_Static_Info;
+
+static_assert(sizeof(KernelStaticInfo) == 512,
+              "Internal kokkos developer error. Please report this error, and "
+              "provide information about your compiler and target platform.");
+
 namespace Experimental {
 using EventSet = Kokkos_Profiling_EventSet;
 static_assert(sizeof(EventSet) / sizeof(Kokkos_Tools_functionPointer) == 275,
@@ -162,6 +174,8 @@ using endFenceFunction        = Kokkos_Profiling_endFenceFunction;
 using dualViewSyncFunction    = Kokkos_Profiling_dualViewSyncFunction;
 using dualViewModifyFunction  = Kokkos_Profiling_dualViewModifyFunction;
 using declareMetadataFunction = Kokkos_Profiling_declareMetadataFunction;
+using markKernelStaticInfoFunction =
+    Kokkos_Profiling_markKernelStaticInfoFunction;
 
 }  // namespace Tools
 
diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp
index c63db1863..7bf6e7a76 100644
--- a/profiling/all/kp_core.hpp
+++ b/profiling/all/kp_core.hpp
@@ -49,6 +49,7 @@ using Kokkos::Tools::SpaceHandle;
 #define EXPOSE_BEGIN_FENCE(FUNC_NAME)
 #define EXPOSE_END_FENCE(FUNC_NAME)
 #define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME)
+#define EXPOSE_MARK_KERNEL_STATIC_INFO(FUNC_NAME)
 
 #else
 
@@ -197,5 +198,13 @@ using Kokkos::Tools::SpaceHandle;
       const char* name, const void* const ptr, bool is_device) { \
     FUNC_NAME(name, ptr, is_device);                             \
   }
+
+#define EXPOSE_MARK_KERNEL_STATIC_INFO(FUNC_NAME)             \
+  __attribute__((weak)) void kokkosp_mark_kernel_static_info( \
+      const uint64_t kernelID,                                \
+      const Kokkos_Profiling_Kernel_Static_Info info) {       \
+    FUNC_NAME(kernelID, info);                                \
+  }
+
 #endif
 #endif  // KOKKOSTOOLS_KOKKOSINTERFACE_HPP
diff --git a/profiling/functor-size/CMakeLists.txt b/profiling/functor-size/CMakeLists.txt
new file mode 100644
index 000000000..018512a80
--- /dev/null
+++ b/profiling/functor-size/CMakeLists.txt
@@ -0,0 +1 @@
+kp_add_library(kp_functor_size kp_functor_size.cpp)
\ No newline at end of file
diff --git a/profiling/functor-size/Makefile b/profiling/functor-size/Makefile
new file mode 100644
index 000000000..bb3b0c2f8
--- /dev/null
+++ b/profiling/functor-size/Makefile
@@ -0,0 +1,14 @@
+
+
+CXX=g++
+CXXFLAGS=-shared -O3 -fPIC -std=c++17
+
+MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
+
+CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all
+
+kp_functor_size.so: ${MAKEFILE_PATH}kp_functor_size.cpp
+	$(CXX) $(CXXFLAGS) -o $@ $<
+
+clean:
+	rm *.so 
diff --git a/profiling/functor-size/kp_functor_size.cpp b/profiling/functor-size/kp_functor_size.cpp
new file mode 100644
index 000000000..147ee3088
--- /dev/null
+++ b/profiling/functor-size/kp_functor_size.cpp
@@ -0,0 +1,143 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#include <iostream>
+#include <unordered_map>
+#include <string>
+
+#include "kp_core.hpp"
+
+namespace KokkosTools {
+namespace FunctorSize {
+
+bool show_warnings = true;
+#define WARN(x)                                                          \
+  {                                                                      \
+    if (show_warnings) {                                                 \
+      std::cerr << "KokkosP: Functor Size: WARNING: " << x << std::endl; \
+    }                                                                    \
+  }
+
+std::unordered_map<uint64_t, uint64_t> anonCount;  // [size] = count
+std::unordered_map<std::string, std::unordered_map<uint64_t, uint64_t>>
+    nameCounts;  // [name][size] = count
+std::vector<std::string> names;
+uint64_t uniqueID = 0;
+
+void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
+                          const uint32_t /*devInfoCount*/,
+                          Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) {
+  std::cerr << "KokkosP: FunctorSize Library Initialized (sequence is "
+            << loadSeq << ", interface version: " << interfaceVer << std::endl;
+}
+
+void kokkosp_finalize_library() {
+  std::cout << std::endl
+            << "KokkosP: Finalization Functor Size profiling library."
+            << std::endl;
+
+  // since the name could be anything, output it last so people can grep / sed /
+  // cut around more easily
+  for (const auto& [name, counts] : nameCounts) {
+    for (const auto& [size, count] : counts) {
+      std::cout << "size " << size << " count " << count << " name " << name
+                << std::endl;
+    }
+  }
+  for (const auto& [size, count] : anonCount) {
+    std::cout << "size " << size << " count " << count
+              << " name KOKKOSP_FUNCTOR_SIZE_ANONYMOUS_FUNCTION" << std::endl;
+  }
+
+  std::cout << std::endl;
+}
+
+void begin_parallel(const char* name, uint64_t* kID) {
+  *kID = uniqueID++;
+  if (nullptr == name) {
+    WARN("Ignording kernel ID "
+         << *kID << " with null name. Results may be incomplete");
+    return;
+  }
+
+  if (*kID < names.size()) {
+    WARN("set new name \"" << name << "\" for previously-seen kernel ID "
+                           << *kID);
+  } else {
+    names.resize((*kID) + 1);  // may have skipped if name was null previously
+  }
+  names[*kID] = name;
+}
+
+void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/,
+                                uint64_t* kID) {
+  begin_parallel(name, kID);
+}
+
+void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/,
+                                   uint64_t* kID) {
+  begin_parallel(name, kID);
+}
+
+void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/,
+                                 uint64_t* kID) {
+  begin_parallel(name, kID);
+}
+
+void kokkosp_mark_kernel_static_info(
+    const uint64_t kernelID, const Kokkos_Profiling_Kernel_Static_Info* info) {
+  if (!info) {
+    WARN("Kokkos provided null info");
+    return;
+  }
+  const uint64_t size = info->functor_size;
+
+  if (kernelID < names.size()) {
+    const std::string& name = names[kernelID];
+    if (0 == nameCounts.count(name)) {
+      nameCounts[name] = {{size, 0}};
+    }
+    std::unordered_map<uint64_t, uint64_t>& nameCount = nameCounts[name];
+
+    if (0 == nameCount.count(size)) {
+      nameCount[size] = 0;
+    }
+    nameCount[size]++;
+  } else {
+    WARN("never-before seen kernel ID \"" << kernelID << "\".");
+
+    if (0 == anonCount.count(size)) {
+      anonCount[size] = 0;
+    }
+    anonCount[size]++;
+  }
+}
+
+}  // namespace FunctorSize
+}  // namespace KokkosTools
+
+extern "C" {
+
+namespace impl = KokkosTools::FunctorSize;
+
+EXPOSE_INIT(impl::kokkosp_init_library)
+EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
+EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
+EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce)
+EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)
+EXPOSE_MARK_KERNEL_STATIC_INFO(impl::kokkosp_mark_kernel_static_info)
+
+}  // extern "C"

From 37ce052386c151fc9d35cff6840ba17421a4cf6e Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Wed, 28 Feb 2024 12:42:07 -0700
Subject: [PATCH 2/7] Match Core profiling interface

---
 profiling/all/impl/Kokkos_Profiling_C_Interface.h | 12 +++---------
 profiling/all/impl/Kokkos_Profiling_Interface.hpp |  3 ++-
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/profiling/all/impl/Kokkos_Profiling_C_Interface.h b/profiling/all/impl/Kokkos_Profiling_C_Interface.h
index 106c52811..2c0e0a9a2 100644
--- a/profiling/all/impl/Kokkos_Profiling_C_Interface.h
+++ b/profiling/all/impl/Kokkos_Profiling_C_Interface.h
@@ -40,17 +40,11 @@ struct Kokkos_Profiling_SpaceHandle {
   char name[64];
 };
 
-const static int Kokkos_Profiling_Kernel_Static_Info_Size = 512;
-
-// uses a union to ensure the struct is as large as the target size
+#define KOKKOS_PROFILING_KERNEL_STATIC_INFO_SIZE 512
 struct Kokkos_Profiling_Kernel_Static_Info {
-  union {
-    struct {
-      uint64_t functor_size;  // sizeof the functor
-    };
+  uint64_t functor_size;  // sizeof the functor
 
-    char padding[Kokkos_Profiling_Kernel_Static_Info_Size];
-  };
+  char padding[KOKKOS_PROFILING_KERNEL_STATIC_INFO_SIZE - sizeof(uint64_t)];
 };
 
 // NOLINTNEXTLINE(modernize-use-using): C compatibility
diff --git a/profiling/all/impl/Kokkos_Profiling_Interface.hpp b/profiling/all/impl/Kokkos_Profiling_Interface.hpp
index d9bfd7145..22f8601b1 100644
--- a/profiling/all/impl/Kokkos_Profiling_Interface.hpp
+++ b/profiling/all/impl/Kokkos_Profiling_Interface.hpp
@@ -124,7 +124,8 @@ namespace Tools {
 
 using KernelStaticInfo = Kokkos_Profiling_Kernel_Static_Info;
 
-static_assert(sizeof(KernelStaticInfo) == 512,
+static_assert(sizeof(KernelStaticInfo) ==
+                  KOKKOS_PROFILING_KERNEL_STATIC_INFO_SIZE,
               "Internal kokkos developer error. Please report this error, and "
               "provide information about your compiler and target platform.");
 

From ff0d1c6d01c50aef372a5505f5affbf20ea1709d Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Mon, 11 Mar 2024 09:04:39 -0600
Subject: [PATCH 3/7] add functor-size tool to CMakeLists.txt

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45299188b..7056689db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -147,6 +147,7 @@ add_subdirectory(debugging/kernel-logger)
 
 # Profilers
 if(NOT WIN32)
+  add_subdirectory(profiling/functor-size)
   add_subdirectory(profiling/simple-kernel-timer)
   add_subdirectory(profiling/memory-hwm)
   if(KokkosTools_ENABLE_MPI)

From 5a0be6350872ab3faa807d51ed3a1712ec11792f Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Mon, 11 Mar 2024 09:05:52 -0600
Subject: [PATCH 4/7] functor-size: add missing <vector> header

---
 profiling/functor-size/kp_functor_size.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/profiling/functor-size/kp_functor_size.cpp b/profiling/functor-size/kp_functor_size.cpp
index 147ee3088..fc05b60dc 100644
--- a/profiling/functor-size/kp_functor_size.cpp
+++ b/profiling/functor-size/kp_functor_size.cpp
@@ -17,6 +17,7 @@
 #include <iostream>
 #include <unordered_map>
 #include <string>
+#include <vector>
 
 #include "kp_core.hpp"
 

From 24a8d1f6fb25d0170a641cdeebf8184b4a29a4e9 Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Mon, 11 Mar 2024 09:06:11 -0600
Subject: [PATCH 5/7] functor-size: match signature in kp_core.hpp

---
 profiling/all/kp_core.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp
index 7bf6e7a76..fb1bc24b7 100644
--- a/profiling/all/kp_core.hpp
+++ b/profiling/all/kp_core.hpp
@@ -202,7 +202,7 @@ using Kokkos::Tools::SpaceHandle;
 #define EXPOSE_MARK_KERNEL_STATIC_INFO(FUNC_NAME)             \
   __attribute__((weak)) void kokkosp_mark_kernel_static_info( \
       const uint64_t kernelID,                                \
-      const Kokkos_Profiling_Kernel_Static_Info info) {       \
+      const Kokkos_Profiling_Kernel_Static_Info *info) {      \
     FUNC_NAME(kernelID, info);                                \
   }
 

From ecb8aa19ea031a54baeee6cd6779f6b7b458152f Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Mon, 11 Mar 2024 09:57:34 -0600
Subject: [PATCH 6/7] functor-size: change output format to csv, add
 KOKKOSP_FUNCTOR_SIZE_OUTPUT_CSV_PATH to control output path

---
 profiling/functor-size/kp_functor_size.cpp | 40 +++++++++++++++-------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/profiling/functor-size/kp_functor_size.cpp b/profiling/functor-size/kp_functor_size.cpp
index fc05b60dc..4d9ca2c5c 100644
--- a/profiling/functor-size/kp_functor_size.cpp
+++ b/profiling/functor-size/kp_functor_size.cpp
@@ -14,9 +14,11 @@
 //
 //@HEADER
 
+#include <fstream>
 #include <iostream>
-#include <unordered_map>
 #include <string>
+#include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include "kp_core.hpp"
@@ -31,6 +33,8 @@ bool show_warnings = true;
       std::cerr << "KokkosP: Functor Size: WARNING: " << x << std::endl; \
     }                                                                    \
   }
+#define ERROR(x) \
+  { std::cerr << "KokkosP: Functor Size: ERROR: " << x << std::endl; }
 
 std::unordered_map<uint64_t, uint64_t> anonCount;  // [size] = count
 std::unordered_map<std::string, std::unordered_map<uint64_t, uint64_t>>
@@ -45,25 +49,37 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
             << loadSeq << ", interface version: " << interfaceVer << std::endl;
 }
 
-void kokkosp_finalize_library() {
-  std::cout << std::endl
-            << "KokkosP: Finalization Functor Size profiling library."
-            << std::endl;
+void dump_csv(std::ostream& os, const std::string_view delim = ",") {
+  os << "size" << delim << "count" << delim << "name" << std::endl;
 
-  // since the name could be anything, output it last so people can grep / sed /
-  // cut around more easily
   for (const auto& [name, counts] : nameCounts) {
     for (const auto& [size, count] : counts) {
-      std::cout << "size " << size << " count " << count << " name " << name
-                << std::endl;
+      os << size << delim << count << delim << name << std::endl;
     }
   }
   for (const auto& [size, count] : anonCount) {
-    std::cout << "size " << size << " count " << count
-              << " name KOKKOSP_FUNCTOR_SIZE_ANONYMOUS_FUNCTION" << std::endl;
+    os << size << delim << count << delim
+       << "KOKKOSP_FUNCTOR_SIZE_ANONYMOUS_FUNCTION" << std::endl;
   }
+}
 
-  std::cout << std::endl;
+void kokkosp_finalize_library() {
+  std::cout << std::endl
+            << "KokkosP: Finalization Functor Size profiling library."
+            << std::endl;
+
+  const char* output_csv_path =
+      std::getenv("KOKKOSP_FUNCTOR_SIZE_OUTPUT_CSV_PATH");
+
+  if (output_csv_path && std::string_view(output_csv_path) != "") {
+    std::ofstream os(output_csv_path);
+    if (os) {
+      dump_csv(os);
+    } else {
+      ERROR(output_csv_path << " counldn't be opened");
+    }
+  }
+  dump_csv(std::cout, ",");
 }
 
 void begin_parallel(const char* name, uint64_t* kID) {

From d88baa829b601a2c784a879bee607b5989eae852 Mon Sep 17 00:00:00 2001
From: Carl Pearson <cwpears@sandia.gov>
Date: Mon, 11 Mar 2024 14:56:17 -0600
Subject: [PATCH 7/7] functor-sizes: formatting

---
 profiling/all/kp_core.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp
index fb1bc24b7..17e3f3b41 100644
--- a/profiling/all/kp_core.hpp
+++ b/profiling/all/kp_core.hpp
@@ -202,7 +202,7 @@ using Kokkos::Tools::SpaceHandle;
 #define EXPOSE_MARK_KERNEL_STATIC_INFO(FUNC_NAME)             \
   __attribute__((weak)) void kokkosp_mark_kernel_static_info( \
       const uint64_t kernelID,                                \
-      const Kokkos_Profiling_Kernel_Static_Info *info) {      \
+      const Kokkos_Profiling_Kernel_Static_Info* info) {      \
     FUNC_NAME(kernelID, info);                                \
   }