-
Notifications
You must be signed in to change notification settings - Fork 243
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First release of training code (#11)
- Loading branch information
1 parent
fec6d0b
commit f41342d
Showing
350 changed files
with
64,112 additions
and
1,808 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,82 @@ | ||
# Copyright 2020 Toyota Research Institute. All rights reserved. | ||
# Handy commands: | ||
# - `make docker-build`: builds DOCKERIMAGE (default: `packnet-sfm:latest`) | ||
PROJECT ?= packnet-sfm | ||
WORKSPACE ?= /workspace/$(PROJECT) | ||
DOCKER_IMAGE ?= ${PROJECT}:latest | ||
|
||
DEPTH_TYPE ?= None | ||
CROP ?= None | ||
SAVE_OUTPUT ?= None | ||
|
||
PYTHON ?= python | ||
DOCKER_IMAGE ?= packnet-sfm:master-latest | ||
DOCKER_OPTS := --name packnet-sfm --rm -it \ | ||
SHMSIZE ?= 444G | ||
WANDB_MODE ?= run | ||
DOCKER_OPTS := \ | ||
--name ${PROJECT} \ | ||
--rm -it \ | ||
--shm-size=${SHMSIZE} \ | ||
-e AWS_DEFAULT_REGION \ | ||
-e AWS_ACCESS_KEY_ID \ | ||
-e AWS_SECRET_ACCESS_KEY \ | ||
-e WANDB_API_KEY \ | ||
-e WANDB_ENTITY \ | ||
-e WANDB_MODE \ | ||
-e HOST_HOSTNAME= \ | ||
-e OMP_NUM_THREADS=1 -e KMP_AFFINITY="granularity=fine,compact,1,0" \ | ||
-e OMPI_ALLOW_RUN_AS_ROOT=1 \ | ||
-e OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \ | ||
-e NCCL_DEBUG=VERSION \ | ||
-e DISPLAY=${DISPLAY} \ | ||
-e XAUTHORITY \ | ||
-e NVIDIA_DRIVER_CAPABILITIES=all \ | ||
-v ~/.aws:/root/.aws \ | ||
-v /root/.ssh:/root/.ssh \ | ||
-v ~/.cache:/root/.cache \ | ||
-v /data:/data \ | ||
-v ${PWD}:/workspace/self-supervised-learning \ | ||
-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \ | ||
-v /mnt/fsx/:/mnt/fsx \ | ||
-v /dev/null:/dev/raw1394 \ | ||
-w /workspace/self-supervised-learning \ | ||
--shm-size=444G \ | ||
-v /tmp:/tmp \ | ||
-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \ | ||
-v /var/run/docker.sock:/var/run/docker.sock \ | ||
-v ${PWD}:${WORKSPACE} \ | ||
-w ${WORKSPACE} \ | ||
--privileged \ | ||
--ipc=host \ | ||
--network=host | ||
|
||
.PHONY: all clean docker-build | ||
NGPUS=$(shell nvidia-smi -L | wc -l) | ||
MPI_CMD=mpirun \ | ||
-allow-run-as-root \ | ||
-np ${NGPUS} \ | ||
-H localhost:${NGPUS} \ | ||
-x MASTER_ADDR=127.0.0.1 \ | ||
-x MASTER_PORT=23457 \ | ||
-x HOROVOD_TIMELINE \ | ||
-x OMP_NUM_THREADS=1 \ | ||
-x KMP_AFFINITY='granularity=fine,compact,1,0' \ | ||
-bind-to none -map-by slot -x NCCL_DEBUG=INFO -x NCCL_MIN_NRINGS=4 \ | ||
--report-bindings | ||
|
||
|
||
.PHONY: all clean docker-build docker-overfit-pose | ||
|
||
all: clean | ||
|
||
clean: | ||
find . -name "*.pyc" | xargs rm -f && \ | ||
find . -name "__pycache__" | xargs rm -rf | ||
|
||
|
||
docker-build: | ||
docker build \ | ||
-t ${DOCKER_IMAGE} . -f docker/Dockerfile | ||
-f docker/Dockerfile \ | ||
-t ${DOCKER_IMAGE} . | ||
|
||
docker-start-interactive: docker-build | ||
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash | ||
|
||
docker-start-jupyter: docker-build | ||
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ | ||
bash | ||
bash -c "jupyter notebook --port=8888 -ip=0.0.0.0 --allow-root --no-browser" | ||
|
||
docker-evaluate-depth: docker-build | ||
docker-run: docker-build | ||
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ | ||
bash -c "bash scripts/evaluate_depth.sh ${MODEL} ${INPUT_PATH} ${DEPTH_TYPE} ${CROP} ${SAVE_OUTPUT}" | ||
bash -c "${COMMAND}" | ||
|
||
docker-run-mpi: docker-build | ||
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ | ||
bash -c "${MPI_CMD} ${COMMAND}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
"""Default packnet_sfm configuration parameters (overridable in configs/*.yaml) | ||
""" | ||
|
||
import os | ||
from yacs.config import CfgNode as CN | ||
|
||
######################################################################################################################## | ||
cfg = CN() | ||
cfg.name = '' # Run name | ||
cfg.debug = False # Debugging flag | ||
######################################################################################################################## | ||
### ARCH | ||
######################################################################################################################## | ||
cfg.arch = CN() | ||
cfg.arch.seed = 42 # Random seed for Pytorch/Numpy initialization | ||
cfg.arch.min_epochs = 1 # Minimum number of epochs | ||
cfg.arch.max_epochs = 50 # Maximum number of epochs | ||
######################################################################################################################## | ||
### CHECKPOINT | ||
######################################################################################################################## | ||
cfg.checkpoint = CN() | ||
cfg.checkpoint.filepath = '' # Checkpoint filepath to save data | ||
cfg.checkpoint.save_top_k = 5 # Number of best models to save | ||
cfg.checkpoint.monitor = 'loss' # Metric to monitor for logging | ||
cfg.checkpoint.monitor_index = 0 # Dataset index for the metric to monitor | ||
cfg.checkpoint.mode = 'auto' # Automatically determine direction of improvement (increase or decrease) | ||
cfg.checkpoint.s3_path = '' # s3 path for AWS model syncing | ||
cfg.checkpoint.s3_frequency = 1 # How often to s3 sync | ||
######################################################################################################################## | ||
### SAVE | ||
######################################################################################################################## | ||
cfg.save = CN() | ||
cfg.save.folder = '' # Folder where data will be saved | ||
cfg.save.viz = True # Flag for saving inverse depth map visualization | ||
cfg.save.npz = True # Flag for saving numpy depth maps | ||
######################################################################################################################## | ||
### WANDB | ||
######################################################################################################################## | ||
cfg.wandb = CN() | ||
cfg.wandb.dry_run = True # Wandb dry-run (not logging) | ||
cfg.wandb.name = '' # Wandb run name | ||
cfg.wandb.project = os.environ.get("WANDB_PROJECT", "") # Wandb project | ||
cfg.wandb.entity = os.environ.get("WANDB_ENTITY", "") # Wandb entity | ||
cfg.wandb.tags = [] # Wandb tags | ||
cfg.wandb.dir = '' # Wandb save folder | ||
######################################################################################################################## | ||
### MODEL | ||
######################################################################################################################## | ||
cfg.model = CN() | ||
cfg.model.name = '' # Training model | ||
cfg.model.checkpoint_path = '' # Checkpoint path for model saving | ||
######################################################################################################################## | ||
### MODEL.OPTIMIZER | ||
######################################################################################################################## | ||
cfg.model.optimizer = CN() | ||
cfg.model.optimizer.name = 'Adam' # Optimizer name | ||
cfg.model.optimizer.depth = CN() | ||
cfg.model.optimizer.depth.lr = 0.0002 # Depth learning rate | ||
cfg.model.optimizer.depth.weight_decay = 0.0 # Dept weight decay | ||
cfg.model.optimizer.pose = CN() | ||
cfg.model.optimizer.pose.lr = 0.0002 # Pose learning rate | ||
cfg.model.optimizer.pose.weight_decay = 0.0 # Pose weight decay | ||
######################################################################################################################## | ||
### MODEL.SCHEDULER | ||
######################################################################################################################## | ||
cfg.model.scheduler = CN() | ||
cfg.model.scheduler.name = 'StepLR' # Scheduler name | ||
cfg.model.scheduler.step_size = 10 # Scheduler step size | ||
cfg.model.scheduler.gamma = 0.5 # Scheduler gamma value | ||
cfg.model.scheduler.T_max = 20 # Scheduler maximum number of iterations | ||
######################################################################################################################## | ||
### MODEL.PARAMS | ||
######################################################################################################################## | ||
cfg.model.params = CN() | ||
cfg.model.params.crop = '' # Which crop should be used during evaluation | ||
cfg.model.params.min_depth = 0.0 # Minimum depth value to evaluate | ||
cfg.model.params.max_depth = 80.0 # Maximum depth value to evaluate | ||
######################################################################################################################## | ||
### MODEL.LOSS | ||
######################################################################################################################## | ||
cfg.model.loss = CN() | ||
# | ||
cfg.model.loss.num_scales = 4 # Number of inverse depth scales to use | ||
cfg.model.loss.progressive_scaling = 0.0 # Training percentage to decay number of scales | ||
cfg.model.loss.flip_lr_prob = 0.5 # Probablity of horizontal flippping | ||
cfg.model.loss.rotation_mode = 'euler' # Rotation mode | ||
cfg.model.loss.upsample_depth_maps = True # Resize depth maps to highest resolution | ||
# | ||
cfg.model.loss.ssim_loss_weight = 0.85 # SSIM loss weight | ||
cfg.model.loss.occ_reg_weight = 0.1 # Occlusion regularizer loss weight | ||
cfg.model.loss.smooth_loss_weight = 0.001 # Smoothness loss weight | ||
cfg.model.loss.C1 = 1e-4 # SSIM parameter | ||
cfg.model.loss.C2 = 9e-4 # SSIM parameter | ||
cfg.model.loss.photometric_reduce_op = 'min' # Method for photometric loss reducing | ||
cfg.model.loss.disp_norm = True # Inverse depth normalization | ||
cfg.model.loss.clip_loss = 0.0 # Clip loss threshold variance | ||
cfg.model.loss.padding_mode = 'zeros' # Photometric loss padding mode | ||
cfg.model.loss.automask_loss = True # Automasking to remove static pixels | ||
# | ||
cfg.model.loss.supervised_method = 'sparse-l1' # Method for depth supervision | ||
cfg.model.loss.supervised_num_scales = 4 # Number of scales for supervised learning | ||
cfg.model.loss.supervised_loss_weight = 0.9 # Supervised loss weight | ||
######################################################################################################################## | ||
### MODEL.DEPTH_NET | ||
######################################################################################################################## | ||
cfg.model.depth_net = CN() | ||
cfg.model.depth_net.name = '' # Depth network name | ||
cfg.model.depth_net.checkpoint_path = '' # Depth checkpoint filepath | ||
cfg.model.depth_net.version = '' # Depth network version | ||
cfg.model.depth_net.dropout = 0.0 # Depth network dropout | ||
######################################################################################################################## | ||
### MODEL.POSE_NET | ||
######################################################################################################################## | ||
cfg.model.pose_net = CN() | ||
cfg.model.pose_net.name = '' # Pose network name | ||
cfg.model.pose_net.checkpoint_path = '' # Pose checkpoint filepath | ||
cfg.model.pose_net.version = '' # Pose network version | ||
cfg.model.pose_net.dropout = 0.0 # Pose network dropout | ||
######################################################################################################################## | ||
### DATASETS | ||
######################################################################################################################## | ||
cfg.datasets = CN() | ||
######################################################################################################################## | ||
### DATASETS.AUGMENTATION | ||
######################################################################################################################## | ||
cfg.datasets.augmentation = CN() | ||
cfg.datasets.augmentation.image_shape = (192, 640) # Image shape | ||
cfg.datasets.augmentation.jittering = (0.2, 0.2, 0.2, 0.05) # Color jittering values | ||
######################################################################################################################## | ||
### DATASETS.TRAIN | ||
######################################################################################################################## | ||
cfg.datasets.train = CN() | ||
cfg.datasets.train.batch_size = 8 # Training batch size | ||
cfg.datasets.train.num_workers = 16 # Training number of workers | ||
cfg.datasets.train.back_context = 1 # Training backward context | ||
cfg.datasets.train.forward_context = 1 # Training forward context | ||
cfg.datasets.train.dataset = [] # Training dataset | ||
cfg.datasets.train.path = [] # Training data path | ||
cfg.datasets.train.split = [] # Training split | ||
cfg.datasets.train.depth_type = [''] # Training depth type | ||
cfg.datasets.train.cameras = [] # Training cameras | ||
cfg.datasets.train.repeat = [1] # Number of times training dataset is repeated per epoch | ||
cfg.datasets.train.num_logs = 5 # Number of training images to log | ||
######################################################################################################################## | ||
### DATASETS.VALIDATION | ||
######################################################################################################################## | ||
cfg.datasets.validation = CN() | ||
cfg.datasets.validation.batch_size = 1 # Validation batch size | ||
cfg.datasets.validation.num_workers = 8 # Validation number of workers | ||
cfg.datasets.validation.back_context = 0 # Validation backward context | ||
cfg.datasets.validation.forward_context = 0 # Validation forward contxt | ||
cfg.datasets.validation.dataset = [] # Validation dataset | ||
cfg.datasets.validation.path = [] # Validation data path | ||
cfg.datasets.validation.split = [] # Validation split | ||
cfg.datasets.validation.depth_type = [''] # Validation depth type | ||
cfg.datasets.validation.cameras = [] # Validation cameras | ||
cfg.datasets.validation.num_logs = 5 # Number of validation images to log | ||
######################################################################################################################## | ||
### DATASETS.TEST | ||
######################################################################################################################## | ||
cfg.datasets.test = CN() | ||
cfg.datasets.test.batch_size = 1 # Test batch size | ||
cfg.datasets.test.num_workers = 8 # Test number of workers | ||
cfg.datasets.test.back_context = 0 # Test backward context | ||
cfg.datasets.test.forward_context = 0 # Test forward context | ||
cfg.datasets.test.dataset = [] # Test dataset | ||
cfg.datasets.test.path = [] # Test data path | ||
cfg.datasets.test.split = [] # Test split | ||
cfg.datasets.test.depth_type = [''] # Test depth type | ||
cfg.datasets.test.cameras = [] # Test cameras | ||
cfg.datasets.test.num_logs = 5 # Number of test images to log | ||
######################################################################################################################## | ||
### THESE SHOULD NOT BE CHANGED | ||
######################################################################################################################## | ||
cfg.config = '' # Run configuration file | ||
cfg.default = '' # Run default configuration file | ||
cfg.wandb.url = '' # Wandb URL | ||
cfg.checkpoint.s3_url = '' # s3 URL | ||
cfg.save.pretrained = '' # Pretrained checkpoint | ||
cfg.prepared = False # Prepared flag | ||
######################################################################################################################## | ||
|
||
def get_cfg_defaults(): | ||
return cfg.clone() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
model: | ||
name: 'SelfSupModel' | ||
depth_net: | ||
name: 'PackNet01' | ||
version: '1A' | ||
pose_net: | ||
name: 'PoseNet' | ||
version: '' | ||
params: | ||
crop: '' | ||
min_depth: 0.0 | ||
max_depth: 200.0 | ||
datasets: | ||
augmentation: | ||
image_shape: (384, 640) | ||
test: | ||
dataset: ['DGP'] | ||
path: ['/data/datasets/DDAD/ddad.json'] | ||
split: ['val'] | ||
depth_type: ['lidar'] | ||
cameras: ['camera_01'] | ||
save: | ||
folder: '/data/save' | ||
viz: True | ||
npz: True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
model: | ||
name: 'SelfSupModel' | ||
depth_net: | ||
name: 'PackNet01' | ||
version: '1A' | ||
pose_net: | ||
name: 'PoseNet' | ||
version: '' | ||
datasets: | ||
augmentation: | ||
image_shape: (384, 640) | ||
test: | ||
dataset: ['Image'] | ||
path: ['images'] | ||
split: ['{:010d}'] | ||
save: | ||
folder: '/data/save' | ||
viz: True | ||
npy: True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
model: | ||
name: 'SelfSupModel' | ||
depth_net: | ||
name: 'PackNet01' | ||
version: '1A' | ||
pose_net: | ||
name: 'PoseNet' | ||
version: '' | ||
params: | ||
crop: 'garg' | ||
min_depth: 0.0 | ||
max_depth: 80.0 | ||
datasets: | ||
augmentation: | ||
image_shape: (192, 640) | ||
test: | ||
dataset: ['KITTI'] | ||
path: ['/data/datasets/KITTI_raw'] | ||
split: ['data_splits/eigen_test_files.txt'] | ||
depth_type: ['velodyne'] | ||
save: | ||
folder: '/data/save' | ||
viz: True | ||
npz: True |
Oops, something went wrong.