Skip to content

Commit

Permalink
Added NVTX instrumentation
Browse files Browse the repository at this point in the history
  • Loading branch information
jirikraus committed Mar 20, 2014
1 parent b740d2c commit d20eab8
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 56 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ DEBUG =

# Includes and libraries
INCLUDE = -I$(INCLUDE_DIR) -I$(THRUST_DIR) -I${DEDISP_DIR}/include -I${CUDA_DIR}/include -I./tclap
LIBS = -L$(CUDA_DIR)/lib64 -lcuda -lcudart -L${DEDISP_DIR}/lib -ldedisp -lcufft -lpthread
LIBS = -L$(CUDA_DIR)/lib64 -lcuda -lcudart -L${DEDISP_DIR}/lib -ldedisp -lcufft -lpthread -lnvToolsExt

# compiler flags
# --compiler-options -Wall
NVCC_COMP_FLAGS = -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_30,code=sm_30
NVCCFLAGS = ${OPTIMISE} ${NVCC_COMP_FLAGS} --machine 64 -Xcompiler ${DEBUG}
CFLAGS = -fPIC ${OPTIMISE} ${DEBUG}
NVCCFLAGS = ${UCFLAGS} ${OPTIMISE} ${NVCC_COMP_FLAGS} --machine 64 -Xcompiler ${DEBUG}
CFLAGS = ${UCFLAGS} -fPIC ${OPTIMISE} ${DEBUG}

OBJECTS = ${OBJ_DIR}/kernels.o
EXE_FILES = ${BIN_DIR}/peasoup ${BIN_DIR}/resampling_test ${BIN_DIR}/harmonic_sum_test
Expand Down
11 changes: 6 additions & 5 deletions Makefile.inc
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# This is where common definitions go

#cuda setup
CUDA_DIR = /usr/local/cuda-5.0/
THRUST_DIR = /usr/local/cuda-5.0/include/
CUDA_DIR = $(CUDAROOT)
THRUST_DIR = $(CUDAROOT)/include/

#dedisp setup
DEDISP_DIR = /mnt/home/ebarr/Soft/dedisp
DEDISP_DIR = /homeb/zam/jkraus/workspace/PulsarSearch/dedisp

GCC = gcc
GXX = g++
AR = ar
NVCC = /usr/local/cuda-5.0/bin/nvcc
SHELL = /bin/csh
NVCC = $(CUDAROOT)/bin/nvcc
SHELL = /bin/bash
UCFLAGS = -DUSE_NVTX
3 changes: 3 additions & 0 deletions include/transforms/harmonicfolder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <kernels/kernels.h>
#include <kernels/defaults.h>
#include <iostream>
#include <utils/nvtx.hpp>

class HarmonicFolder {
private:
Expand All @@ -24,6 +25,7 @@ class HarmonicFolder {

void fold(DevicePowerSpectrum<float>& fold0)
{
PUSH_NVTX_RANGE("Harmonic summing",2)
float** h_data_ptrs;
float** d_data_ptrs;
Utils::device_malloc<float*>(&d_data_ptrs,sums.size());
Expand All @@ -37,6 +39,7 @@ class HarmonicFolder {
device_harmonic_sum(fold0.get_data(),d_data_ptrs,
fold0.get_nbins(),sums.size(),
max_blocks,max_threads);
POP_NVTX_RANGE
}

~HarmonicFolder()
Expand Down
24 changes: 24 additions & 0 deletions include/utils/nvtx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once
#ifdef USE_NVTX
#include "nvToolsExt.h"

static const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
static const int num_colors = sizeof(colors)/sizeof(uint32_t);

#define PUSH_NVTX_RANGE(name,cid) { \
int color_id = cid; \
color_id = color_id%num_colors;\
nvtxEventAttributes_t eventAttrib = {0}; \
eventAttrib.version = NVTX_VERSION; \
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
eventAttrib.colorType = NVTX_COLOR_ARGB; \
eventAttrib.color = colors[color_id]; \
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
eventAttrib.message.ascii = name; \
nvtxRangePushEx(&eventAttrib); \
}
#define POP_NVTX_RANGE nvtxRangePop();
#else
#define PUSH_NVTX_RANGE(name,cid)
#define POP_NVTX_RANGE
#endif
101 changes: 53 additions & 48 deletions src/pipeline_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,100 +134,103 @@ class Worker {
float padding_mean;
int ii;

PUSH_NVTX_RANGE("DM-Loop",0)
while (true){
ii = manager.get_dm_trial_idx();
if (ii==-1)
break;
break;
trials.get_idx(ii,tim);

if (args.verbose)
std::cout << "Copying DM trial to device (DM: " << tim.get_dm() << ")"<< std::endl;
std::cout << "Copying DM trial to device (DM: " << tim.get_dm() << ")"<< std::endl;
d_tim.copy_from_host(tim);

if (padding){
padding_mean = stats::mean<float>(d_tim.get_data(),trials.get_nsamps());
d_tim.fill(trials.get_nsamps(),d_tim.get_nsamps(),padding_mean);
padding_mean = stats::mean<float>(d_tim.get_data(),trials.get_nsamps());
d_tim.fill(trials.get_nsamps(),d_tim.get_nsamps(),padding_mean);
}

if (args.verbose)
std::cout << "Generating accelration list" << std::endl;
std::cout << "Generating accelration list" << std::endl;
acc_plan.generate_accel_list(tim.get_dm(),acc_list);

if (args.verbose)
std::cout << "Searching "<< acc_list.size()<< " acceleration trials for DM "<< tim.get_dm() << std::endl;
std::cout << "Searching "<< acc_list.size()<< " acceleration trials for DM "<< tim.get_dm() << std::endl;

if (args.verbose)
std::cout << "Executing forward FFT" << std::endl;
std::cout << "Executing forward FFT" << std::endl;
r2cfft.execute(d_tim.get_data(),d_fseries.get_data());

if (args.verbose)
std::cout << "Forming power spectrum" << std::endl;
std::cout << "Forming power spectrum" << std::endl;
former.form(d_fseries,pspec);

if (args.verbose)
std::cout << "Finding running median" << std::endl;
std::cout << "Finding running median" << std::endl;
rednoise.calculate_median(pspec);

if (args.verbose)
std::cout << "Dereddening Fourier series" << std::endl;
std::cout << "Dereddening Fourier series" << std::endl;
rednoise.deredden(d_fseries);

if (args.zapfilename!=""){
if (args.verbose)
std::cout << "Zapping birdies" << std::endl;
bzap->zap(d_fseries);
if (args.verbose)
std::cout << "Zapping birdies" << std::endl;
bzap->zap(d_fseries);
}

if (args.verbose)
std::cout << "Forming interpolated power spectrum" << std::endl;
std::cout << "Forming interpolated power spectrum" << std::endl;
former.form_interpolated(d_fseries,pspec);

if (args.verbose)
std::cout << "Finding statistics" << std::endl;
std::cout << "Finding statistics" << std::endl;
stats::stats<float>(pspec.get_data(),size/2+1,&mean,&rms,&std);

if (args.verbose)
std::cout << "Executing inverse FFT" << std::endl;
std::cout << "Executing inverse FFT" << std::endl;
c2rfft.execute(d_fseries.get_data(),d_tim.get_data());

CandidateCollection accel_trial_cands;
CandidateCollection accel_trial_cands;
PUSH_NVTX_RANGE("Acceleration-Loop",1)
for (int jj=0;jj<acc_list.size();jj++){
if (args.verbose)
std::cout << "Resampling to "<< acc_list[jj] << " m/s/s" << std::endl;
resampler.resample(d_tim,d_tim_r,size,acc_list[jj]);


if (args.verbose)
std::cout << "Execute forward FFT" << std::endl;
r2cfft.execute(d_tim_r.get_data(),d_fseries.get_data());

if (args.verbose)
std::cout << "Form interpolated power spectrum" << std::endl;
former.form_interpolated(d_fseries,pspec);

if (args.verbose)
std::cout << "Normalise power spectrum" << std::endl;
stats::normalise(pspec.get_data(),mean*size,std*size,size/2+1);

if (args.verbose)
std::cout << "Harmonic summing" << std::endl;
harm_folder.fold(pspec);

if (args.verbose)
std::cout << "Finding peaks" << std::endl;
SpectrumCandidates trial_cands(tim.get_dm(),ii,acc_list[jj]);
cand_finder.find_candidates(pspec,trial_cands);
cand_finder.find_candidates(sums,trial_cands);
if (args.verbose)
std::cout << "Resampling to "<< acc_list[jj] << " m/s/s" << std::endl;
resampler.resample(d_tim,d_tim_r,size,acc_list[jj]);

if (args.verbose)
std::cout << "Execute forward FFT" << std::endl;
r2cfft.execute(d_tim_r.get_data(),d_fseries.get_data());

if (args.verbose)
std::cout << "Form interpolated power spectrum" << std::endl;
former.form_interpolated(d_fseries,pspec);

if (args.verbose)
std::cout << "Normalise power spectrum" << std::endl;
stats::normalise(pspec.get_data(),mean*size,std*size,size/2+1);

if (args.verbose)
std::cout << "Harmonic summing" << std::endl;
harm_folder.fold(pspec);

if (args.verbose)
std::cout << "Finding peaks" << std::endl;
SpectrumCandidates trial_cands(tim.get_dm(),ii,acc_list[jj]);
cand_finder.find_candidates(pspec,trial_cands);
cand_finder.find_candidates(sums,trial_cands);

if (args.verbose)
std::cout << "Distilling harmonics" << std::endl;
accel_trial_cands.append(harm_finder.distill(trial_cands.cands));
if (args.verbose)
std::cout << "Distilling harmonics" << std::endl;
accel_trial_cands.append(harm_finder.distill(trial_cands.cands));
}
POP_NVTX_RANGE
if (args.verbose)
std::cout << "Distilling accelerations" << std::endl;
std::cout << "Distilling accelerations" << std::endl;
dm_trial_cands.append(acc_still.distill(accel_trial_cands.cands));
}

POP_NVTX_RANGE

if (args.zapfilename!="")
delete bzap;

Expand Down Expand Up @@ -299,7 +302,9 @@ int main(int argc, char **argv)
printf("Starting dedispersion...\n");

timers["dedispersion"].start();
PUSH_NVTX_RANGE("Dedisperse",3)
DispersionTrials<unsigned char> trials = dedisperser.dedisperse();
POP_NVTX_RANGE
timers["dedispersion"].stop();

if (args.progress_bar)
Expand Down

0 comments on commit d20eab8

Please sign in to comment.