From 12cb1959a8363eae94c2b3b7df89dc5535c12683 Mon Sep 17 00:00:00 2001 From: aireeenmei Date: Tue, 19 Dec 2023 13:16:57 +0000 Subject: [PATCH] minor change and cleanup --- MaxText/input_pipeline.py | 9 --------- setup_gcsfuse.sh | 24 ++++++++++++++---------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/MaxText/input_pipeline.py b/MaxText/input_pipeline.py index 9d9e1faa0..066b2d3c1 100644 --- a/MaxText/input_pipeline.py +++ b/MaxText/input_pipeline.py @@ -288,15 +288,6 @@ def get_datasets_pygrain( else: eval_ds = train_ds - # train_ds = tfds.data_source(config.dataset_name, split="train") - # if config.eval_dataset_name: - # eval_ds = tfds.data_source(config.dataset_name, split=config.eval_split) - # else: - # eval_ds = train_ds - - # lazy_dataset = pygrain.experimental.lazy_dataset - # train_ds = lazy_dataset.SourceLazyMapDataset(train_ds) - # eval_ds = lazy_dataset.SourceLazyMapDataset(eval_ds) return train_ds, eval_ds diff --git a/setup_gcsfuse.sh b/setup_gcsfuse.sh index 0467143da..806bf44c4 100644 --- a/setup_gcsfuse.sh +++ b/setup_gcsfuse.sh @@ -15,9 +15,9 @@ # limitations under the License. # Description: -# bash setup_gcsfuse.sh DATASET_GCS_BUCKET=maxtext-dataset MOUNT_PATH=dataset +# sudo bash setup_gcsfuse.sh DATASET_GCS_BUCKET=maxtext-dataset MOUNT_PATH=dataset -set -e +set -e -x # Set environment variables for ARGUMENT in "$@"; do @@ -31,18 +31,22 @@ if [[ -z ${DATASET_GCS_BUCKET} || -z ${MOUNT_PATH} ]]; then exit 1 fi -if [[ $GCS_BUCKET == gs://* ]] ; -then +if [[ $GCS_BUCKET == gs://* ]] ; then echo "Remove gs:// from GCS bucket name" exit 1 fi -sudo apt-get -y install fuse -export GCSFUSE_REPO=gcsfuse-`lsb_release -c -s` -echo "deb https://packages.cloud.google.com/apt $GCSFUSE_REPO main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list -curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - -sudo apt-get update -sudo apt-get -y install gcsfuse +if ! command -v gcsfuse &> /dev/null ; then + apt-get update -y && \ + apt-get install -y lsb-release && \ + apt-get install -y gnupg && \ + apt-get install -y curl + export GCSFUSE_REPO=gcsfuse-`lsb_release -c -s` + echo "deb https://packages.cloud.google.com/apt $GCSFUSE_REPO main" | tee /etc/apt/sources.list.d/gcsfuse.list + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - + apt-get update -y && apt-get -y install gcsfuse + rm -rf /var/lib/apt/lists/* +fi mkdir -p $MOUNT_PATH