diff --git a/references/classification/train.py b/references/classification/train.py index d52124fcf33..943bd651986 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -26,7 +26,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, arg for i, (image, target) in enumerate(metric_logger.log_every(data_loader, args.print_freq, header)): start_time = time.time() image, target = image.to(device), target.to(device) - with torch.cuda.amp.autocast(enabled=scaler is not None): + with torch.amp.autocast("cuda", enabled=scaler is not None): output = model(image) loss = criterion(output, target) diff --git a/references/depth/stereo/cascade_evaluation.py b/references/depth/stereo/cascade_evaluation.py index 7cb6413f1a5..28af2044c1a 100644 --- a/references/depth/stereo/cascade_evaluation.py +++ b/references/depth/stereo/cascade_evaluation.py @@ -139,7 +139,7 @@ def _evaluate( logger.add_meter("fl-all", fmt="{global_avg:.4f}") num_processed_samples = 0 - with torch.cuda.amp.autocast(enabled=args.mixed_precision, dtype=torch.float16): + with torch.amp.autocast("cuda", enabled=args.mixed_precision, dtype=torch.float16): batch_idx = 0 for blob in metric_logger.log_every(val_loader, print_freq, header): image_left, image_right, disp_gt, valid_disp_mask = (x.to(device) for x in blob) diff --git a/references/depth/stereo/train.py b/references/depth/stereo/train.py index e3d572153b2..5cf57150a69 100644 --- a/references/depth/stereo/train.py +++ b/references/depth/stereo/train.py @@ -169,7 +169,7 @@ def _evaluate( logger.add_meter("fl-all", fmt="{global_avg:.4f}") num_processed_samples = 0 - with torch.cuda.amp.autocast(enabled=args.mixed_precision, dtype=torch.float16): + with torch.amp.autocast("cuda", enabled=args.mixed_precision, dtype=torch.float16): for blob in metric_logger.log_every(val_loader, print_freq, header): image_left, image_right, disp_gt, valid_disp_mask = (x.to(device) for x in blob) padder = utils.InputPadder(image_left.shape, mode=padder_mode) @@ -314,7 +314,7 @@ def run(model, optimizer, scheduler, train_loader, val_loaders, logger, writer, # unpack the data blob image_left, image_right, disp_mask, valid_disp_mask = (x.to(device) for x in data_blob) - with torch.cuda.amp.autocast(enabled=args.mixed_precision, dtype=torch.float16): + with torch.amp.autocast("cuda", enabled=args.mixed_precision, dtype=torch.float16): disp_predictions = model(image_left, image_right, flow_init=None, num_iters=args.recurrent_updates) # different models have different outputs, make sure we get the right ones for this task disp_predictions = make_stereo_flow(disp_predictions, model_out_channels) diff --git a/references/detection/engine.py b/references/detection/engine.py index 0e9bfffdf8a..4e3a19b3aab 100644 --- a/references/detection/engine.py +++ b/references/detection/engine.py @@ -27,7 +27,7 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, sc for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets] - with torch.cuda.amp.autocast(enabled=scaler is not None): + with torch.amp.autocast("cuda", enabled=scaler is not None): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) diff --git a/references/segmentation/train.py b/references/segmentation/train.py index abdc3c6aacb..72591091e62 100644 --- a/references/segmentation/train.py +++ b/references/segmentation/train.py @@ -107,7 +107,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, devi header = f"Epoch: [{epoch}]" for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) - with torch.cuda.amp.autocast(enabled=scaler is not None): + with torch.amp.autocast("cuda", enabled=scaler is not None): output = model(image) loss = criterion(output, target) diff --git a/references/video_classification/train.py b/references/video_classification/train.py index a03a9722003..36e5d0ad3c1 100644 --- a/references/video_classification/train.py +++ b/references/video_classification/train.py @@ -25,7 +25,7 @@ def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, devi for video, target, _ in metric_logger.log_every(data_loader, print_freq, header): start_time = time.time() video, target = video.to(device), target.to(device) - with torch.cuda.amp.autocast(enabled=scaler is not None): + with torch.amp.autocast("cuda", enabled=scaler is not None): output = model(video) loss = criterion(output, target) diff --git a/test/test_models.py b/test/test_models.py index 202bbdbd0cd..f774d34585b 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -606,7 +606,7 @@ def checkOut(out): checkOut(out) - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): out = model(model_input) checkOut(out) @@ -705,7 +705,7 @@ def test_classification_model(model_fn, dev): _check_fx_compatible(model, x, eager_out=out) if dev == "cuda": - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): out = model(x) # See autocast_flaky_numerics comment at top of file. if model_name not in autocast_flaky_numerics: @@ -761,7 +761,7 @@ def check_out(out): _check_fx_compatible(model, x, eager_out=out) if dev == "cuda": - with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state(): + with torch.amp.autocast("cuda"), torch.no_grad(), freeze_rng_state(): out = model(x) # See autocast_flaky_numerics comment at top of file. if model_name not in autocast_flaky_numerics: @@ -864,7 +864,7 @@ def compute_mean_std(tensor): _check_jit_scriptable(model, ([x],), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out) if dev == "cuda": - with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state(): + with torch.amp.autocast("cuda"), torch.no_grad(), freeze_rng_state(): out = model(model_input) # See autocast_flaky_numerics comment at top of file. if model_name not in autocast_flaky_numerics: @@ -941,7 +941,7 @@ def test_video_model(model_fn, dev): assert out.shape[-1] == num_classes if dev == "cuda": - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): out = model(x) # See autocast_flaky_numerics comment at top of file. if model_name not in autocast_flaky_numerics: diff --git a/test/test_ops.py b/test/test_ops.py index 1ba7a2c9efa..97f5237a00c 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -232,7 +232,7 @@ def func(z): @pytest.mark.parametrize("x_dtype", (torch.float, torch.half)) @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half)) def test_autocast(self, x_dtype, rois_dtype): - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype) def _helper_boxes_shape(self, func): @@ -497,7 +497,7 @@ def test_forward(self, device, contiguous, deterministic, aligned, x_dtype, rois @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half)) @pytest.mark.opcheck_only_one() def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype): - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): self.test_forward( torch.device("cuda"), contiguous=False, @@ -513,7 +513,7 @@ def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype): @pytest.mark.parametrize("x_dtype", (torch.float, torch.bfloat16)) @pytest.mark.parametrize("rois_dtype", (torch.float, torch.bfloat16)) def test_autocast_cpu(self, aligned, deterministic, x_dtype, rois_dtype): - with torch.cpu.amp.autocast(): + with torch.amp.autocast("cpu"): self.test_forward( torch.device("cpu"), contiguous=False, @@ -856,14 +856,14 @@ def test_nms_gpu(self, iou, device, dtype=torch.float64): @pytest.mark.parametrize("dtype", (torch.float, torch.half)) @pytest.mark.opcheck_only_one() def test_autocast(self, iou, dtype): - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): self.test_nms_gpu(iou=iou, dtype=dtype, device="cuda") @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) @pytest.mark.parametrize("dtype", (torch.float, torch.bfloat16)) def test_autocast_cpu(self, iou, dtype): boxes, scores = self._create_tensors_with_iou(1000, iou) - with torch.cpu.amp.autocast(): + with torch.amp.autocast("cpu"): keep_ref_float = ops.nms(boxes.to(dtype).float(), scores.to(dtype).float(), iou) keep_dtype = ops.nms(boxes.to(dtype), scores.to(dtype), iou) torch.testing.assert_close(keep_ref_float, keep_dtype) @@ -1193,7 +1193,7 @@ def test_compare_cpu_cuda_grads(self, contiguous): @pytest.mark.parametrize("dtype", (torch.float, torch.half)) @pytest.mark.opcheck_only_one() def test_autocast(self, batch_sz, dtype): - with torch.cuda.amp.autocast(): + with torch.amp.autocast("cuda"): self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype) def test_forward_scriptability(self):