From fb1c1a63489e04ffcddda3ef36412d7cdb9df290 Mon Sep 17 00:00:00 2001 From: hariharandev1 Date: Tue, 30 Jul 2024 12:14:29 -0700 Subject: [PATCH] Remove shuffling from tfreader as it is already shuffled before, --- dlio_benchmark/reader/tf_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlio_benchmark/reader/tf_reader.py b/dlio_benchmark/reader/tf_reader.py index ce37b925..c7e2e914 100644 --- a/dlio_benchmark/reader/tf_reader.py +++ b/dlio_benchmark/reader/tf_reader.py @@ -82,7 +82,7 @@ def _parse_image(self, serialized): @dlp.log def next(self): logging.debug(f"{utcnow()} Reading {len(self._file_list)} files thread {self.thread_index} rank {self._args.my_rank}") - filenames = tf.data.Dataset.list_files(self._file_list, shuffle=True) + filenames = tf.data.Dataset.list_files(self._file_list, shuffle=False) # sharding in the file list if we have enought files. if (len(self._file_list) >= self._args.comm_size): filenames = filenames.shard(num_shards=self._args.comm_size, index=self._args.my_rank)