diff --git a/apps/search_disk_index.cpp b/apps/search_disk_index.cpp
index 6b0793db7..bd3aafc2e 100644
--- a/apps/search_disk_index.cpp
+++ b/apps/search_disk_index.cpp
@@ -179,7 +179,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
     std::string recall_string = "Recall@" + std::to_string(recall_at);
     diskann::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16)
                   << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16)
-                  << "Mean IO (us)" << std::setw(16) << "CPU (s)";
+                  << "Mean IO (us)" << std::setw(16) << "CPU (s)" << std::setw(16)<< "PQ Training(s)";
     if (calc_recall_flag)
     {
         diskann::cout << std::setw(16) << recall_string << std::endl;
@@ -272,6 +272,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
 
         auto mean_io_us = diskann::get_mean_stats<float>(stats, query_num,
                                                          [](const diskann::QueryStats &stats) { return stats.io_us; });
+        auto mean_pq_training_us = diskann::get_mean_stats<float>(stats, query_num,
+                                                         [](const diskann::QueryStats &stats) { return stats.pq_training_us; });
 
         double recall = 0;
         if (calc_recall_flag)
@@ -283,7 +285,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
 
         diskann::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps
                       << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios
-                      << std::setw(16) << mean_io_us << std::setw(16) << mean_cpuus;
+                      << std::setw(16) << mean_io_us << std::setw(16) << mean_cpuus << std::setw(16) << mean_pq_training_us;
         if (calc_recall_flag)
         {
             diskann::cout << std::setw(16) << recall << std::endl;
diff --git a/include/percentile_stats.h b/include/percentile_stats.h
index 793257577..fbd740930 100644
--- a/include/percentile_stats.h
+++ b/include/percentile_stats.h
@@ -23,6 +23,7 @@ struct QueryStats
     float total_us = 0; // total time to process query in micros
     float io_us = 0;    // total time spent in IO
     float cpu_us = 0;   // total time spent in CPU
+    float pq_training_us = 0; // total time spent in PQ training
 
     unsigned n_4k = 0;         // # of 4kB reads
     unsigned n_8k = 0;         // # of 8kB reads
diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp
index 42e006131..c5b83836d 100644
--- a/src/pq_flash_index.cpp
+++ b/src/pq_flash_index.cpp
@@ -1275,7 +1275,7 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
     if (beam_width > num_sector_per_nodes * defaults::MAX_N_SECTOR_READS)
         throw ANNException("Beamwidth can not be higher than defaults::MAX_N_SECTOR_READS", -1, __FUNCSIG__, __FILE__,
                            __LINE__);
-    Timer query_timer, io_timer, cpu_timer;
+    Timer query_timer, io_timer, cpu_timer, pq_training_timer;
     ScratchStoreManager<SSDThreadData<T>> manager(this->_thread_data);
     auto data = manager.scratch_space();
     IOContext &ctx = data->ctx;
@@ -1334,6 +1334,7 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
         _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN);
 
     cpu_timer.reset();
+    pq_training_timer.reset();
     // query <-> PQ chunk centers distances
     _pq_table.preprocess_query(query_rotated); // center the query and rotate if
                                                // we have a rotation matrix
@@ -1341,6 +1342,7 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
     _pq_table.populate_chunk_distances(query_rotated, pq_dists);
     if (stats != nullptr)
     {
+        stats->pq_training_us = (float)pq_training_timer.elapsed();
         stats->cpu_us += (float)cpu_timer.elapsed();
     }