Skip to content

Commit

Permalink
minor changes to address ahmad's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
deekay42 committed Jun 11, 2024
1 parent f190d99 commit 0a88d27
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
24 changes: 18 additions & 6 deletions torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ std::vector<torch::Tensor> encode_jpegs_cuda(
// object correctly upon program exit. This is because, when cudaJpegEncoder
// gets destroyed, the CUDA runtime may already be shut down, rendering all
// destroy* calls in the encoder destructor invalid. Instead, we use an
// atexit hook which executes after main() finishes, but before CUDA shuts
// down when the program exits.
// atexit hook which executes after main() finishes, but hopefully before
// CUDA shuts down when the program exits. If CUDA is already shut down the
// destructor will detect this and will not attempt to destroy any encoder
// structures.
std::atexit([]() { delete cudaJpegEncoder.release(); });
}

Expand Down Expand Up @@ -90,7 +92,7 @@ std::vector<torch::Tensor> encode_jpegs_cuda(
}
}

cudaJpegEncoder->setQuality(quality);
cudaJpegEncoder->set_quality(quality);
std::vector<torch::Tensor> encoded_images;
at::cuda::CUDAEvent event;
event.record(cudaJpegEncoder->stream);
Expand All @@ -103,8 +105,9 @@ std::vector<torch::Tensor> encode_jpegs_cuda(
// may be ready on that stream we cannot assume that they are also available
// on the current stream of the calling context when this function returns. We
// use a blocking event to ensure that this is indeed the case. Crucially, we
// do not want to block the host (which is what cudaStreamSynchronize would
// do) Events allow us to synchronize the streams without blocking the host
// do not want to block the host at this particular point
// (which is what cudaStreamSynchronize would do.) Events allow us to
// synchronize the streams without blocking the host.
event.block(at::cuda::getCurrentCUDAStream(
cudaJpegEncoder->original_device.has_index()
? cudaJpegEncoder->original_device.index()
Expand Down Expand Up @@ -140,6 +143,15 @@ CUDAJpegEncoder::CUDAJpegEncoder(const torch::Device& target_device)
}

CUDAJpegEncoder::~CUDAJpegEncoder() {
// We run cudaGetDeviceCount as a dummy to test if the CUDA runtime is still
// initialized. If it is not, we can skip the rest of this function as it is
// unsafe to execute.
int deviceCount = 0;
cudaError_t error = cudaGetDeviceCount(&deviceCount);
if (error != cudaSuccess)
return; // CUDA runtime has already shut down. There's nothing we can do
// now.

nvjpegStatus_t status;

status = nvjpegEncoderParamsDestroy(nv_enc_params);
Expand Down Expand Up @@ -235,7 +247,7 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) {
return encoded_image;
}

void CUDAJpegEncoder::setQuality(const int64_t quality) {
void CUDAJpegEncoder::set_quality(const int64_t quality) {
nvjpegStatus_t paramsQualityStatus =
nvjpegEncoderParamsSetQuality(nv_enc_params, quality, stream);
TORCH_CHECK(
Expand Down
2 changes: 1 addition & 1 deletion torchvision/csrc/io/image/cuda/encode_jpegs_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class CUDAJpegEncoder {

torch::Tensor encode_jpeg(const torch::Tensor& src_image);

void setQuality(const int64_t);
void set_quality(const int64_t quality);

const torch::Device original_device;
const torch::Device target_device;
Expand Down

0 comments on commit 0a88d27

Please sign in to comment.