diff --git a/.ci/validate-examples.py b/.ci/validate-examples.py index f5f3b0f6..5d38d961 100644 --- a/.ci/validate-examples.py +++ b/.ci/validate-examples.py @@ -51,7 +51,6 @@ class ErrorCollection: - _errors: List[str] = [] def add(self, error: str): @@ -253,7 +252,6 @@ def _lint_python_cell(file_name: str, code_lines: List[str]) -> List[str]: if __name__ == "__main__": - ERRORS = ErrorCollection() res = requests.get(url=RECIPE_SCHEMA_URL) diff --git a/examples/api/houseprice.py b/examples/api/houseprice.py index 505fb8d8..52392d4f 100644 --- a/examples/api/houseprice.py +++ b/examples/api/houseprice.py @@ -21,7 +21,6 @@ async def predict(BedroomAbvGr: int = None, YearBuilt: int = None): a = pd.DataFrame([[BedroomAbvGr, YearBuilt]], columns=["BedroomAbvGr", "YearBuilt"]) v = lr.predict(a) if not ((0 <= BedroomAbvGr <= 8) and (1872 <= YearBuilt <= 2100)): - raise HTTPException( status_code=400, detail="Please enter BedroomAbvGr between 0 and 8. Enter YearBuilt between 1872 and 2100", diff --git a/examples/rapids/01-rapids-single-gpu.ipynb b/examples/rapids/01-rapids-single-gpu.ipynb index 30918f3b..d0dcac66 100644 --- a/examples/rapids/01-rapids-single-gpu.ipynb +++ b/examples/rapids/01-rapids-single-gpu.ipynb @@ -133,7 +133,6 @@ "outputs": [], "source": [ "def prep_df(df: cudf.DataFrame) -> cudf.DataFrame:\n", - "\n", " df = df[df[\"fare_amount\"] > 0] # to avoid a divide by zero error\n", " df[\"tip_fraction\"] = df[\"tip_amount\"] / df[\"fare_amount\"]\n", " df[\"target\"] = df[\"tip_fraction\"] > 0.2\n", diff --git a/examples/rapids/02-rapids-gpu-cluster.ipynb b/examples/rapids/02-rapids-gpu-cluster.ipynb index c10c972f..5ad3dd07 100644 --- a/examples/rapids/02-rapids-gpu-cluster.ipynb +++ b/examples/rapids/02-rapids-gpu-cluster.ipynb @@ -211,7 +211,6 @@ "outputs": [], "source": [ "def prep_df(df: dask_cudf.DataFrame) -> dask_cudf.DataFrame:\n", - "\n", " df = df[df[\"fare_amount\"] > 0] # to avoid a divide by zero error\n", " df[\"tip_fraction\"] = df[\"tip_amount\"] / df[\"fare_amount\"]\n", " df[\"target\"] = df[\"tip_fraction\"] > 0.2\n", diff --git a/examples/registering-images/register.py b/examples/registering-images/register.py index cab018c8..aa76b8cb 100644 --- a/examples/registering-images/register.py +++ b/examples/registering-images/register.py @@ -14,12 +14,12 @@ with open("/home/jovyan/image_spec.json") as f: IMAGE_SPEC = json.load(f) - + with open("/home/jovyan/base_image_spec.json") as f: BASE_IMAGE_SPEC = json.load(f) - -DRY_RUN = os.getenv('DRY_RUN', 'TRUE').lower() == 'true' + +DRY_RUN = os.getenv("DRY_RUN", "TRUE").lower() == "true" # this should be populated by Saturn. @@ -35,17 +35,15 @@ def list_images(ecr_image_name: str) -> List[Dict[str, str]]: - image_uri - image_tag """ - ecr = boto3.client('ecr') + ecr = boto3.client("ecr") - repository = ecr.describe_repositories(repositoryNames=[ecr_image_name])[ - 'repositories' - ][0] - repository_uri = repository['repositoryUri'] + repository = ecr.describe_repositories(repositoryNames=[ecr_image_name])["repositories"][0] + repository_uri = repository["repositoryUri"] list_images = ecr.get_paginator("list_images") for page in list_images.paginate(repositoryName=ecr_image_name): - for image_id in page['imageIds']: - tag = image_id.get('imageTag', None) + for image_id in page["imageIds"]: + tag = image_id.get("imageTag", None) if tag: yield dict(image_uri=f"{repository_uri}:{tag}", image_tag=tag) @@ -57,7 +55,6 @@ def make_url(path: str, queries: Optional[Dict[str, str]] = None) -> str: return f"{BASE_URL}/{path}" - def register(image_uri: str, version: str, saturn_image_name: str, dry_run: bool = False): """ looks up Saturn image_id from saturn_image_name. @@ -68,50 +65,50 @@ def register(image_uri: str, version: str, saturn_image_name: str, dry_run: bool q = f"owner:{SATURN_USERNAME} name:{saturn_image_name}" url = make_url("api/images", dict(q=q, page_size="-1")) images = requests.get(url, headers=saturn_headers).json() - images = [x for x in images['images'] if x['name'] == saturn_image_name] + images = [x for x in images["images"] if x["name"] == saturn_image_name] if not images: - raise ValueError(f'no image found for {q}') + raise ValueError(f"no image found for {q}") elif len(images) > 1: - raise ValueError(f'multiple images found for {q}') + raise ValueError(f"multiple images found for {q}") image = images[0] - image_id = image['id'] + image_id = image["id"] q = f"version:{version}" url = make_url(f"api/images/{image_id}/tags", dict(q=q, page_size="-1")) - tags = requests.get(url, headers=saturn_headers).json()['image_tags'] - if image_uri in [x['image_uri'] for x in tags]: - print(f'found {image_uri}') + tags = requests.get(url, headers=saturn_headers).json()["image_tags"] + if image_uri in [x["image_uri"] for x in tags]: + print(f"found {image_uri}") return print(f"REGISTER {image_uri} {image}") if not dry_run: url = make_url(f"api/images/{image_id}/tags") - requests.post(url, json={'image_uri': image_uri}, headers=saturn_headers) + requests.post(url, json={"image_uri": image_uri}, headers=saturn_headers) + - def get_all_tags(saturn_image_id: str) -> List[Dict]: url = make_url(f"api/images/{saturn_image_id}/tags", dict(page_size="-1")) - tags = requests.get(url, headers=saturn_headers).json()['image_tags'] + tags = requests.get(url, headers=saturn_headers).json()["image_tags"] return tags -def delete_all_tags(saturn_image_id: str, tags: List[Dict], dry_run: bool=False): +def delete_all_tags(saturn_image_id: str, tags: List[Dict], dry_run: bool = False): tags = get_all_tags(saturn_image_id) for t in tags: url = make_url(f"api/images/{saturn_image_id}/tags/{t['id']}") - print('delete', url) + print("delete", url) if not dry_run: - resp = requests.delete(url, headers=saturn_headers) - - + requests.delete(url, headers=saturn_headers) + + def register_by_id(image_uri: str, version: str, saturn_image_id: str, dry_run: bool = False): """ Create a new ImageTag object with image_uri and version under saturn_image_name """ if not dry_run: url = make_url(f"api/images/{saturn_image_id}/tags") - requests.post(url, json={'image_uri': image_uri}, headers=saturn_headers) + requests.post(url, json={"image_uri": image_uri}, headers=saturn_headers) def register_base_image(ecr_image_name: str, saturn_image_id: str): @@ -124,17 +121,17 @@ def register_base_image(ecr_image_name: str, saturn_image_id: str): 3. Register a new image tag under saturn_image_id """ ecr_images = list(list_images(ecr_image_name)) - ecr_image = sorted(ecr_images, key=lambda x: x['image_tag'])[-1] - image_uri = ecr_image['image_uri'] - image_tag = ecr_image['image_tag'] + ecr_image = sorted(ecr_images, key=lambda x: x["image_tag"])[-1] + image_uri = ecr_image["image_uri"] + image_tag = ecr_image["image_tag"] tags = get_all_tags(saturn_image_id) - if image_uri in [x['image_uri'] for x in tags]: - print(f'found {image_uri}') + if image_uri in [x["image_uri"] for x in tags]: + print(f"found {image_uri}") return delete_all_tags(saturn_image_id, tags, dry_run=DRY_RUN) register_by_id(image_uri, image_tag, saturn_image_id, dry_run=DRY_RUN) - + def register_all(ecr_image_name: str, saturn_image_name: str): """ for a given ecr image name, retrieve all image_uris/tags from ECR. @@ -142,22 +139,22 @@ def register_all(ecr_image_name: str, saturn_image_name: str): """ ecr_images = list_images(ecr_image_name) for image in ecr_images: - image_uri = image['image_uri'] - image_tag = image['image_tag'] + image_uri = image["image_uri"] + image_tag = image["image_tag"] register(image_uri, image_tag, saturn_image_name, dry_run=DRY_RUN) - + def sync_base(): for image_spec in BASE_IMAGE_SPEC: - ecr_image_name = image_spec['ecr_image_name'] - saturn_image_id = image_spec['saturn_image_id'] + ecr_image_name = image_spec["ecr_image_name"] + saturn_image_id = image_spec["saturn_image_id"] register_base_image(ecr_image_name, saturn_image_id) - + def sync(): for image_spec in IMAGE_SPEC: - ecr_image_name = image_spec['ecr_image_name'] - saturn_image_name = image_spec['saturn_image_name'] + ecr_image_name = image_spec["ecr_image_name"] + saturn_image_name = image_spec["saturn_image_name"] register_all(ecr_image_name, saturn_image_name) diff --git a/examples/registering-users/register.py b/examples/registering-users/register.py index 253108d4..f0103202 100644 --- a/examples/registering-users/register.py +++ b/examples/registering-users/register.py @@ -14,12 +14,13 @@ SATURN_TOKEN = os.getenv("SATURN_TOKEN") saturn_headers = {"Authorization": f"token {SATURN_TOKEN}"} + def check_for_account_by_email(email: str) -> bool: url = f"{BASE_URL}/api/users" query_string = urlencode(dict(q=f"email:{email}", page=1, size=1)) url = url + "?" + query_string response = requests.get(url, headers=saturn_headers) - results = response.json()['users'] + results = response.json()["users"] if results: return True return False @@ -30,7 +31,7 @@ def check_for_account_by_username(username: str) -> bool: query_string = urlencode(dict(q=f"username:{username}", page=1, size=1)) url = url + "?" + query_string response = requests.get(url, headers=saturn_headers) - results = response.json()['users'] + results = response.json()["users"] if results: return True @@ -38,7 +39,7 @@ def check_for_account_by_username(username: str) -> bool: def make_unique_username(email: str) -> str: - candidate_username = email.split('@')[0] + candidate_username = email.split("@")[0] candidate_username = "".join(c for c in candidate_username if c.isalnum()) # we'll try 100 integers until we get a unique name @@ -48,7 +49,7 @@ def make_unique_username(email: str) -> str: to_try = candidate_username + str(c) if not check_for_account_by_username(to_try): return to_try - raise ValueError(f'unable to find username for {candidate_username}') + raise ValueError(f"unable to find username for {candidate_username}") def make_account(username: str, email: str): @@ -73,7 +74,7 @@ def ensure_account_exists(email: str) -> None: def run(): - for email in EMAILS_FOR_ACCOUNTS.split('\n'): + for email in EMAILS_FOR_ACCOUNTS.split("\n"): if email: ensure_account_exists(email) diff --git a/examples/snowflake-ml/training.py b/examples/snowflake-ml/training.py index 4af0268f..e9bf2f6f 100644 --- a/examples/snowflake-ml/training.py +++ b/examples/snowflake-ml/training.py @@ -19,7 +19,6 @@ def simple_train_single(batch_size, downsample_to, n_epochs, base_lr, conn_kwargs): - # --------- Format params --------- # device = torch.device("cuda") net = models.resnet50(pretrained=False) # True means we start with the imagenet version @@ -111,7 +110,6 @@ def simple_train_single(batch_size, downsample_to, n_epochs, base_lr, conn_kwarg if __name__ == "__main__": - conn_kwargs = dict( user=os.environ["SNOWFLAKE_USER"], password=os.environ["SNOWFLAKE_PASSWORD"], diff --git a/examples/tensorflow-comparison/comparison-training-multigpu.ipynb b/examples/tensorflow-comparison/comparison-training-multigpu.ipynb index 982e7a9a..d56ba1af 100644 --- a/examples/tensorflow-comparison/comparison-training-multigpu.ipynb +++ b/examples/tensorflow-comparison/comparison-training-multigpu.ipynb @@ -124,7 +124,6 @@ "def train_multigpu(\n", " n_epochs, classes, base_lr, batchsize, wbargs, scale_batch=False, scale_lr=False\n", "):\n", - "\n", " wbargs = {**wbargs, \"scale_batch\": scale_batch, \"scale_lr\": scale_lr}\n", "\n", " # --------- Start wandb --------- #\n", diff --git a/examples/tensorflow-comparison/comparison-training-singlenode.ipynb b/examples/tensorflow-comparison/comparison-training-singlenode.ipynb index 810fff8d..56e7311f 100644 --- a/examples/tensorflow-comparison/comparison-training-singlenode.ipynb +++ b/examples/tensorflow-comparison/comparison-training-singlenode.ipynb @@ -126,7 +126,6 @@ "outputs": [], "source": [ "def train_model_fit(n_epochs, base_lr, batchsize, classes):\n", - "\n", " model = tf.keras.applications.ResNet50(include_top=True, weights=None, classes=classes)\n", "\n", " # --------- Start wandb --------- #\n", diff --git a/examples/tensorflow-multigpu/multi-gpu-tensorflow.ipynb b/examples/tensorflow-multigpu/multi-gpu-tensorflow.ipynb index 146cfbd0..f254a880 100644 --- a/examples/tensorflow-multigpu/multi-gpu-tensorflow.ipynb +++ b/examples/tensorflow-multigpu/multi-gpu-tensorflow.ipynb @@ -87,7 +87,6 @@ "outputs": [], "source": [ "def train_multigpu(n_epochs, classes, base_lr, batchsize, scale_batch=False, scale_lr=False):\n", - "\n", " strategy = tf.distribute.MirroredStrategy()\n", " print(\"Number of devices: %d\" % strategy.num_replicas_in_sync)\n", "\n", diff --git a/examples/tensorflow/single-gpu-tensorflow.ipynb b/examples/tensorflow/single-gpu-tensorflow.ipynb index 1672d25d..975b88d5 100644 --- a/examples/tensorflow/single-gpu-tensorflow.ipynb +++ b/examples/tensorflow/single-gpu-tensorflow.ipynb @@ -89,7 +89,6 @@ "outputs": [], "source": [ "def train_model_fit(n_epochs, base_lr, batchsize, classes):\n", - "\n", " model = tf.keras.applications.ResNet50(include_top=True, weights=None, classes=classes)\n", "\n", " # Data\n", diff --git a/examples/wandb/wandb.ipynb b/examples/wandb/wandb.ipynb index f75939e9..3d848681 100644 --- a/examples/wandb/wandb.ipynb +++ b/examples/wandb/wandb.ipynb @@ -164,7 +164,6 @@ "def simple_train_single(\n", " bucket, prefix, batch_size, downsample_to, n_epochs, base_lr, pretrained_classes\n", "):\n", - "\n", " # --------- Format params --------- #\n", " device = torch.device(\"cuda\")\n", " net = models.resnet50(pretrained=True) # True means we start with the imagenet version\n",