Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to allow GPU allocations, ports & more arguments to launch_shell #9

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doodad/launch_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ def launch_shell(
mode=LOCAL,
dry=False,
mount_points=None,
port=None,
root=False
):
if mount_points is None:
mount_points = []
mode.launch_command(command, dry=dry)
mode.launch_command(command, mount_points=mount_points, dry=dry, port=port, root=root)


def launch_python(
Expand Down
57 changes: 27 additions & 30 deletions doodad/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,23 @@ def launch_command(self, cmd, mount_points=None, dry=False, verbose=False):


class DockerMode(LaunchMode):
def __init__(self, image='ubuntu:16.04', gpu=False):
def __init__(self, image='ubuntu:16.04', gpu=False, visible_gpu_devices=[]):
super(DockerMode, self).__init__()
self.docker_image = image
self.docker_name = uuid.uuid4()

assert not self.gpu or len(self.visible_gpu_devices) > 0
self.gpu = gpu
self.visible_gpu_devices = visible_gpu_devices

def get_docker_cmd(self, main_cmd, extra_args='', use_tty=True, verbose=True, pythonpath=None, pre_cmd=None, post_cmd=None,
checkpoint=False, no_root=False):
checkpoint=False, no_root=False, port=None):
cmd_list= CommandBuilder()
if pre_cmd:
cmd_list.extend(pre_cmd)

if verbose:
if self.gpu:
if self.gpu and len(self.visible_gpu_devices) > 0:
cmd_list.append('echo \"Running in docker (gpu)\"')
else:
cmd_list.append('echo \"Running in docker\"')
Expand Down Expand Up @@ -113,8 +116,10 @@ def get_docker_cmd(self, main_cmd, extra_args='', use_tty=True, verbose=True, py
docker_prefix = 'docker run %s -ti %s /bin/bash -c ' % (extra_args, self.docker_image)
else:
docker_prefix = 'docker run %s %s /bin/bash -c ' % (extra_args, self.docker_image)
if self.gpu:
if self.gpu and len(self.visible_gpu_devices) > 0:
docker_prefix = 'nvidia-'+docker_prefix
docker_prefix = 'NV_GPU=\'%s\' ' % ','.join(map(str, self.visible_gpu_devices)) + docker_prefix

main_cmd = cmd_list.to_string()
full_cmd = docker_prefix + ("\'%s\'" % main_cmd)
return full_cmd
Expand All @@ -125,7 +130,7 @@ def __init__(self, checkpoints=None, **kwargs):
super(LocalDocker, self).__init__(**kwargs)
self.checkpoints = checkpoints

def launch_command(self, cmd, mount_points=None, dry=False, verbose=False):
def launch_command(self, cmd, mount_points=None, dry=False, verbose=False, port=None, root=False, use_tty=True):
mnt_args = ''
py_path = []
for mount in mount_points:
Expand All @@ -139,8 +144,11 @@ def launch_command(self, cmd, mount_points=None, dry=False, verbose=False):
else:
raise NotImplementedError(type(mount))

full_cmd = self.get_docker_cmd(cmd, extra_args=mnt_args, pythonpath=py_path,
checkpoint=self.checkpoints)
if port is not None:
mnt_args += ' -p %s:%s' % (port, port)

full_cmd = self.get_docker_cmd(cmd, extra_args=mnt_args, pythonpath=py_path, use_tty=use_tty,
checkpoint=self.checkpoints, port=port, no_root=not(root))
if verbose:
print(full_cmd)
call_and_wait(full_cmd, dry=dry)
Expand Down Expand Up @@ -221,7 +229,6 @@ class EC2SpotDocker(DockerMode):
def __init__(self,
credentials,
region='us-west-1',
s3_bucket_region='us-west-1',
instance_type='m1.small',
spot_price=0.0,
s3_bucket=None,
Expand All @@ -234,7 +241,6 @@ def __init__(self,
security_group_ids=None,
security_groups=None,
aws_s3_path=None,
extra_ec2_instance_kwargs=None,
**kwargs
):
super(EC2SpotDocker, self).__init__(**kwargs)
Expand All @@ -244,7 +250,6 @@ def __init__(self,
security_groups = []
self.credentials = credentials
self.region = region
self.s3_bucket_region = s3_bucket_region
self.spot_price = str(float(spot_price))
self.instance_type = instance_type
self.terminate = terminate
Expand All @@ -256,7 +261,6 @@ def __init__(self,
self.security_group_ids = security_group_ids
self.security_groups = security_groups
self.iam_instance_profile_name = iam_instance_profile_name
self.extra_ec2_instance_kwargs = extra_ec2_instance_kwargs
self.checkpoint = None

self.s3_mount_path = 's3://%s/doodad/mount' % self.s3_bucket
Expand All @@ -268,7 +272,7 @@ def upload_file_to_s3(self, script_content, dry=False):
f.close()
remote_path = os.path.join(self.s3_mount_path, 'oversize_bash_scripts', str(uuid.uuid4()))
subprocess.check_call(["aws", "s3", "cp", f.name, remote_path,
'--region', self.s3_bucket_region])
'--region', self.region])
os.unlink(f.name)
return remote_path

Expand All @@ -277,11 +281,10 @@ def s3_upload(self, file_name, bucket, remote_filename=None, dry=False, check_ex
remote_filename = os.path.basename(file_name)
remote_path = 'doodad/mount/'+remote_filename
if check_exist:
if s3_exists(bucket, remote_path, region=self.s3_bucket_region):
if s3_exists(bucket, remote_path, region=self.region):
print('\t%s exists! ' % os.path.join(bucket, remote_path))
return 's3://'+os.path.join(bucket, remote_path)
return s3_upload(file_name, bucket, remote_path, dry=dry,
region=self.s3_bucket_region)
return s3_upload(file_name, bucket, remote_path, dry=dry, region=self.region)

def make_timekey(self):
return '%d'%(int(time.time()*1000))
Expand All @@ -307,7 +310,6 @@ def launch_command(self, main_cmd, mount_points=None, dry=False, verbose=False):

sio = StringIO()
sio.write("#!/bin/bash\n")
sio.write("truncate -s 0 /home/ubuntu/user_data.log\n")
sio.write("{\n")
sio.write('die() { status=$1; shift; echo "FATAL: $*"; exit $status; }\n')
sio.write('EC2_INSTANCE_ID="`wget -q -O - http://169.254.169.254/latest/meta-data/instance-id`"\n')
Expand All @@ -319,7 +321,7 @@ def launch_command(self, main_cmd, mount_points=None, dry=False, verbose=False):
""".format(exp_prefix=exp_prefix, aws_region=self.region))
sio.write("service docker start\n")
sio.write("docker --config /home/ubuntu/.docker pull {docker_image}\n".format(docker_image=self.docker_image))
sio.write("export AWS_DEFAULT_REGION={aws_region}\n".format(aws_region=self.s3_bucket_region))
sio.write("export AWS_DEFAULT_REGION={aws_region}\n".format(aws_region=self.region))
sio.write("""
curl "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -o "awscli-bundle.zip"
unzip awscli-bundle.zip
Expand Down Expand Up @@ -423,7 +425,7 @@ def launch_command(self, main_cmd, mount_points=None, dry=False, verbose=False):
sio.write("aws ec2 create-tags --resources $EC2_INSTANCE_ID --tags Key=Name,Value={exp_name} --region {aws_region}\n".format(
exp_name=exp_name, aws_region=self.region))

if self.gpu:
if self.gpu and len(self.visible_gpu_devices) > 0:
#sio.write('echo "LSMOD NVIDIA:"\n')
#sio.write("lsmod | grep nvidia\n")
#sio.write("echo 'Waiting for dpkg lock...'\n")
Expand Down Expand Up @@ -491,7 +493,7 @@ def launch_command(self, main_cmd, mount_points=None, dry=False, verbose=False):
aws s3 cp {s3_path} /home/ubuntu/remote_script.sh --region {aws_region} && \\
chmod +x /home/ubuntu/remote_script.sh && \\
bash /home/ubuntu/remote_script.sh
""".format(s3_path=s3_path, aws_region=self.s3_bucket_region))
""".format(s3_path=s3_path, aws_region=self.region))
user_data = dedent(sio.getvalue())
else:
user_data = full_script
Expand All @@ -515,8 +517,6 @@ def launch_command(self, main_cmd, mount_points=None, dry=False, verbose=False):
),
#**config.AWS_EXTRA_CONFIGS,
)
if self.extra_ec2_instance_kwargs is not None:
instance_args.update(self.extra_ec2_instance_kwargs)

if verbose:
print("************************************************************")
Expand Down Expand Up @@ -560,30 +560,27 @@ def __init__(self,
region='us-west-1',
s3_bucket=None,
image_id=None,
aws_key_name=None,
iam_profile=None,
**kwargs
):
# find config file
from doodad.ec2.autoconfig import AUTOCONFIG
from doodad.ec2.credentials import AWSCredentials
s3_bucket = AUTOCONFIG.s3_bucket() if s3_bucket is None else s3_bucket
image_id = AUTOCONFIG.aws_image_id(region) if image_id is None else image_id
aws_key_name= AUTOCONFIG.aws_key_name(region) if aws_key_name is None else aws_key_name
iam_profile= AUTOCONFIG.iam_profile_name() if iam_profile is None else iam_profile
image_id = image_id or AUTOCONFIG.aws_image_id(region)
aws_key_name= AUTOCONFIG.aws_key_name(region)
iam_profile= AUTOCONFIG.iam_profile_name()
credentials=AWSCredentials(aws_key=AUTOCONFIG.aws_access_key(), aws_secret=AUTOCONFIG.aws_access_secret())
security_group_ids = AUTOCONFIG.aws_security_group_ids()[region]
security_group_ids = [AUTOCONFIG.aws_security_group_ids()[region]]
security_groups = AUTOCONFIG.aws_security_groups()

super(EC2AutoconfigDocker, self).__init__(
s3_bucket=s3_bucket,
image_id=image_id,
aws_key_name=aws_key_name,
iam_instance_profile_name=iam_profile,
credentials=credentials,
region=region,
security_groups=security_groups,
security_group_ids=security_group_ids,
#security_groups=security_groups,
#security_group_ids=security_group_ids,
**kwargs
)

Expand Down
2 changes: 1 addition & 1 deletion doodad/mount.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __init__(self, git_url, git_credentials=None, **kwargs):

class MountS3(Mount):
def __init__(self, s3_path, s3_bucket=None, sync_interval=15, output=False,
include_types=('*.txt', '*.csv', '*.json', '*.gz', '*.tar', '*.log', '*.pkl'), **kwargs):
include_types=('*.txt', '*.npy', '*.png', '*.csv', '*.json', '*.gz', '*.tar', '*.log', '*.pkl'), **kwargs):
super(MountS3, self).__init__(**kwargs)
if s3_bucket is None:
# load from config
Expand Down