Skip to content

Commit

Permalink
Improve CI Performance. (#227)
Browse files Browse the repository at this point in the history
* IMprove CI Performance.

1. Improve generation of hdf5 files it was taking too long.
2. Do not finalize in pytest this allows multiple tests to run together.

* Reduce DEBUG LEVEL

* revert test

* reduce threads not enough data

* increase files to support some I/O
  • Loading branch information
hariharan-devarajan committed Sep 3, 2024
1 parent d6924f3 commit bbe9bae
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 31 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ jobs:
CXX: g++-${{ matrix.gcc }}
DFTRACER_BUILD_TYPE: "Debug"
DFTRACER_ENABLE: 1
DFTRACER_LOG_LEVEL: "DEBUG"
DFTRACER_LOG_LEVEL: "INFO"
DLIO_EXEC: ${{ matrix.venv == 'via-setup' && 'dlio_benchmark' || 'python dlio_benchmark/main.py' }}
GOTCHA_DEBUG: 3
GOTCHA_DEBUG: 1
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
PYTHON_VER: ${{ matrix.python }}
Expand Down Expand Up @@ -227,9 +227,9 @@ jobs:
run: |
source ${VENV_PATH}/bin/activate
rm -rf output data checkpoints
mpirun -np 2 ${DLIO_EXEC} workload=resnet50_a100 ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=4
mpirun -np 2 ${DLIO_EXEC} workload=resnet50_h100 ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=4
mpirun -np 2 ${DLIO_EXEC} workload=resnet50_h100 ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=4 ++workload.dataset.format=synthetic
mpirun -np 2 ${DLIO_EXEC} workload=resnet50_a100 ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.reader.read_threads=1
mpirun -np 2 ${DLIO_EXEC} workload=resnet50_h100 ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.reader.read_threads=1
mpirun -np 2 ${DLIO_EXEC} workload=resnet50_h100 ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.reader.read_threads=1 ++workload.dataset.format=synthetic
rm -rf data
- name: test_cosmoflow
run: |
Expand Down
41 changes: 16 additions & 25 deletions dlio_benchmark/data_generator/hdf5_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,38 +45,29 @@ def generate(self):
"""
super().generate()
np.random.seed(10)
samples_per_iter=max(1, int(self._args.generation_buffer_size/self._args.record_length))
record_labels = [0] * self.num_samples
dim = self.get_dimension(self.total_files_to_generate)
chunks = None
if self.enable_chunking:
chunk_dimension = int(math.ceil(math.sqrt(self.chunk_size)))
if chunk_dimension > self._dimension:
chunk_dimension = self._dimension
chunks = (1, chunk_dimension, chunk_dimension)
compression = None
compression_level = None
if self.compression != Compression.NONE:
compression = str(self.compression)
if self.compression == Compression.GZIP:
compression_level = self.compression_level
for i in dlp.iter(range(self.my_rank, int(self.total_files_to_generate), self.comm_size)):
progress(i, self.total_files_to_generate, "Generating HDF5 Data")
dim1 = dim[2*i]
dim2 = dim[2*i+1]
records = np.random.randint(255, size=(samples_per_iter, dim1, dim2), dtype=np.uint8)
records = np.random.randint(255, size=(dim1, dim2, self.num_samples), dtype=np.uint8)
out_path_spec = self.storage.get_uri(self._file_list[i])
progress(i+1, self.total_files_to_generate, "Generating NPZ Data")
hf = h5py.File(out_path_spec, 'w')
chunks = None
if self.enable_chunking:
chunk_dimension = int(math.ceil(math.sqrt(self.chunk_size)))
if chunk_dimension > self._dimension:
chunk_dimension = self._dimension
chunks = (1, chunk_dimension, chunk_dimension)
compression = None
compression_level = None
if self.compression != Compression.NONE:
compression = str(self.compression)
if self.compression == Compression.GZIP:
compression_level = self.compression_level
dset = hf.create_dataset('records', (self.num_samples, dim1, dim2), chunks=chunks, compression=compression,
compression_opts=compression_level, dtype=np.uint8)
samples_written = 0
while samples_written < self.num_samples:
if samples_per_iter < self.num_samples-samples_written:
samples_to_write = samples_per_iter
else:
samples_to_write = self.num_samples-samples_written
dset[samples_written:samples_written+samples_to_write] = records[:samples_to_write]
samples_written += samples_to_write
hf.create_dataset('records', (self.num_samples, dim1, dim2), chunks=chunks, compression=compression,
compression_opts=compression_level, dtype=np.uint8, data=records)
hf.create_dataset('labels', data=record_labels)
hf.close()
np.random.seed()
3 changes: 2 additions & 1 deletion tests/dlio_benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ def init():
DLIOMPI.get_instance().initialize()

def finalize():
DLIOMPI.get_instance().finalize()
# DLIOMPI.get_instance().finalize()
pass

def clean(storage_root="./") -> None:
comm.Barrier()
Expand Down

0 comments on commit bbe9bae

Please sign in to comment.