diff --git a/backend/copr_backend/background_worker_build.py b/backend/copr_backend/background_worker_build.py index 0eee39ab8..71ae026b5 100644 --- a/backend/copr_backend/background_worker_build.py +++ b/backend/copr_backend/background_worker_build.py @@ -10,7 +10,9 @@ import statistics import time import json +import shlex +from datetime import datetime from packaging import version from copr_common.enums import StatusEnum @@ -38,6 +40,11 @@ MIN_BUILDER_VERSION = "0.68.dev" CANCEL_CHECK_PERIOD = 5 +# See also copr-builder script +USER_SSH_DEFAULT_EXPIRATION = 60 * 60 +USER_SSH_MAX_EXPIRATION = 60 * 60 * 24 * 2 +USER_SSH_EXPIRATION_PATH = "/run/copr-builder-expiration" + MESSAGES = { "give_up_repo": "Giving up waiting for copr_base repository, " @@ -51,6 +58,9 @@ COMMANDS = { "rpm_q_builder": "rpm -q copr-rpmbuild --qf \"%{VERSION}\n\"", + "echo_authorized_keys": "echo {0} >> /root/.ssh/authorized_keys", + "set_expiration": "echo -n {0} > " + USER_SSH_EXPIRATION_PATH, + "cat_expiration": "cat {0}".format(USER_SSH_EXPIRATION_PATH), } @@ -139,6 +149,7 @@ def __init__(self): self.builder_livelog = os.path.join(self.builder_dir, "main.log") self.builder_results = os.path.join(self.builder_dir, "results") self.ssh = None + self.root_ssh = None self.job = None self.host = None self.canceled = False @@ -589,12 +600,23 @@ def _download_results(self): """ Retry rsync-download the results several times. """ + include = None + exclude = None + + if self.job.allow_user_ssh: + self.log.info("Builder allowed user SSH, not downloading the " + "results for safety reasons.") + include = ["success", "*.spec"] + exclude = ["*"] + self.log.info("Downloading results from builder") self.ssh.rsync_download( self.builder_results + "/", self.job.results_dir, logfile=self.job.rsync_log_name, max_retries=2, + include=include, + exclude=exclude, ) def _check_build_success(self): @@ -740,6 +762,113 @@ def _add_pubkey(self): self.log.info("Added pubkey for user %s project %s into: %s", user, project, pubkey_path) + @skipped_for_source_build + def _setup_for_user_ssh(self): + """ + Setup the builder for user SSH + https://github.com/fedora-copr/debate/tree/main/user-ssh-builders + + If the builder setup for user SSH becomes more complicated than just + installing the public key, we might want to move the code to a script + within `copr-builder` and call it here or from `copr-rpmbuild`. There + is no requirement for it to be here. + """ + if not self.job.allow_user_ssh: + return + self._alloc_root_ssh_connection() + self._deploy_user_ssh() + self._set_default_expiration() + + def _alloc_root_ssh_connection(self): + self.log.info("Allocating root ssh connection to builder") + self.root_ssh = SSHConnection( + user="root", + host=self.host.hostname, + config_file=self.opts.ssh.builder_config, + log=self.log, + ) + + def _deploy_user_ssh(self): + """ + Deploy user public key to the builder, so that they can connect via SSH. + """ + pubkey = shlex.quote(self.job.ssh_public_key) + cmd = COMMANDS["echo_authorized_keys"].format(pubkey) + rc, _out, _err = self.root_ssh.run_expensive(cmd) + if rc != 0: + self.log.error("Failed to deploy user SSH key for %s", + self.job.project_owner) + return + self.log.info("Deployed user SSH key for %s", self.job.project_owner) + + def _set_default_expiration(self): + """ + Set the default expiration time for the builder + """ + default = self.job.started_on + USER_SSH_DEFAULT_EXPIRATION + cmd = COMMANDS["set_expiration"].format(shlex.quote(str(default))) + rc, _out, _err = self.root_ssh.run_expensive(cmd) + if rc != 0: + # This only affects the `copr-builder show` command to print unknown + # remaining time. It won't affect the backend in terminating the + # buidler when it is supposed to + self.log.error("Failed to set the default expiration time") + return + self.log.info("The expiration time was set to %s", default) + + def _builder_expiration(self): + """ + Find the user preference for the builder expiration. + """ + rc, out, _err = self.root_ssh.run_expensive(COMMANDS["cat_expiration"]) + if rc == 0: + try: + return datetime.fromtimestamp(float(out)) + except ValueError: + pass + self.log.error("Unable to query builder expiration file") + return None + + def _keep_alive_for_user_ssh(self): + """ + Wait until user releases the VM or until it expires. + """ + if not self.job.allow_user_ssh: + return + + # We are calculating the limits from when the job started but we may + # want to consider starting the watch when job ends. + default = datetime.fromtimestamp( + self.job.started_on + USER_SSH_DEFAULT_EXPIRATION) + maxlimit = datetime.fromtimestamp( + self.job.started_on + USER_SSH_MAX_EXPIRATION) + + self.log.info("Keeping builder alive for user SSH") + + def _keep_alive(): + while True: + if self.canceled: + self.log.warning("Build canceled, VM will be shut-down soon") + break + expiration = self._builder_expiration() or default + if datetime.now() > expiration: + self.log.warning("VM expired, it will be shut-down soon") + break + if datetime.now() > maxlimit: + msg = "VM exceeded max limit, it will be shut-down soon" + self.log.warning(msg) + break + time.sleep(60) + + CancellableThreadTask( + _keep_alive, + self._cancel_task_check_request, + self._cancel_running_worker, + check_period=CANCEL_CHECK_PERIOD, + ).run() + if self.canceled: + raise BuildCanceled + def build(self, attempt): """ Attempt to build. @@ -754,6 +883,7 @@ def build(self, attempt): self._fill_build_info_file() self._cancel_if_requested() self._mark_running(attempt) + self._setup_for_user_ssh() self._start_remote_build() transfer_failure = CancellableThreadTask( self._transfer_log_file, @@ -766,6 +896,8 @@ def build(self, attempt): if transfer_failure: raise BuildRetry("SSH problems when downloading live log: {}" .format(transfer_failure)) + + self._keep_alive_for_user_ssh() self._download_results() self._drop_host() diff --git a/backend/copr_backend/daemons/build_dispatcher.py b/backend/copr_backend/daemons/build_dispatcher.py index c2f05bebc..98a9dba2e 100644 --- a/backend/copr_backend/daemons/build_dispatcher.py +++ b/backend/copr_backend/daemons/build_dispatcher.py @@ -8,6 +8,7 @@ ArchitectureWorkerLimit, ArchitectureUserWorkerLimit, BuildTagLimit, + UserSSHLimit, RPMBuildWorkerManager, BuildQueueTask, ) @@ -105,6 +106,14 @@ def __init__(self, backend_opts): name=limit_type, )) + userssh = UserSSHLimit( + None, + # lambda x: getattr(x, "userssh"), + # "foo", + backend_opts.builds_limits["userssh"]) + self.limits.append(userssh) + + def get_frontend_tasks(self): """ Retrieve a list of build jobs to be done. diff --git a/backend/copr_backend/helpers.py b/backend/copr_backend/helpers.py index 75fa5e62d..bad80fbed 100644 --- a/backend/copr_backend/helpers.py +++ b/backend/copr_backend/helpers.py @@ -240,6 +240,8 @@ def _get_limits_conf(parser): parser, "backend", "builds_max_workers_sandbox", 10, mode="int") limits['owner'] = _get_conf( parser, "backend", "builds_max_workers_owner", 20, mode="int") + limits['userssh'] = _get_conf( + parser, "backend", "builds_max_userssh", 2, mode="int") return limits diff --git a/backend/copr_backend/job.py b/backend/copr_backend/job.py index 76046852a..9d7e6a8ac 100644 --- a/backend/copr_backend/job.py +++ b/backend/copr_backend/job.py @@ -27,6 +27,7 @@ def __init__(self, task_data, worker_opts): - timeout: default worker timeout """ + # pylint: disable=too-many-statements self.timeout = worker_opts.timeout self.frontend_base_url = worker_opts.frontend_base_url @@ -72,6 +73,8 @@ def __init__(self, task_data, worker_opts): self.results = None self.appstream = None + self.allow_user_ssh = None + self.ssh_public_key = None # TODO: validate update data for key, val in task_data.items(): diff --git a/backend/copr_backend/rpm_builds.py b/backend/copr_backend/rpm_builds.py index af0ce5606..fd16c629e 100644 --- a/backend/copr_backend/rpm_builds.py +++ b/backend/copr_backend/rpm_builds.py @@ -2,6 +2,7 @@ Abstraction for RPM and SRPM builds on backend. """ +import uuid from copr_common.worker_manager import ( HashWorkerLimit, WorkerManager, @@ -150,6 +151,25 @@ def __init__(self, architecture, limit): ) +class UserSSHLimit(HashWorkerLimit): + """ + Limit the number of builders that allow user SSH + """ + def __init__(self, _hasher, limit): + def predicate(x): + # We don't allow user SSH for SRPM builds, return unique string + # making this unlimited + if x.source_build: + return str(uuid.uuid4()) + + # Not limit builds that doesn't allow user SSH + # pylint: disable=protected-access + if not x._task.get("allow_user_ssh"): + return str(uuid.uuid4()) + return x.owner + super().__init__(predicate, limit, name="userssh") + + class BuildTagLimit(PredicateWorkerLimit): """ Limit the amount of concurrently running builds per given build tag. diff --git a/backend/copr_backend/sshcmd.py b/backend/copr_backend/sshcmd.py index 14314a6a6..a2683cd16 100644 --- a/backend/copr_backend/sshcmd.py +++ b/backend/copr_backend/sshcmd.py @@ -213,7 +213,7 @@ def _full_source_path(self, src): return "{}@{}:{}".format(self.user, host, src) def rsync_download(self, src, dest, logfile=None, max_retries=0, - subprocess_timeout=None): + subprocess_timeout=None, include=None, exclude=None): """ Run rsync over pre-allocated socket (by the config) @@ -231,9 +231,10 @@ def rsync_download(self, src, dest, logfile=None, max_retries=0, directory needs to exist. """ self._retry(self._rsync_download, max_retries, src, dest, logfile, - subprocess_timeout) + subprocess_timeout, include, exclude) - def _rsync_download(self, src, dest, logfile, subprocess_timeout): + def _rsync_download(self, src, dest, logfile, subprocess_timeout, + include, exclude): ssh_opts = "ssh" if self.config_file: ssh_opts += " -F " + self.config_file @@ -243,8 +244,23 @@ def _rsync_download(self, src, dest, logfile, subprocess_timeout): log_filepath = "/dev/null" if logfile: log_filepath = os.path.join(dest, logfile) - command = "/usr/bin/rsync -rltDvH --chmod=D755,F644 -e '{}' {} {}/ &> {}".format( - ssh_opts, full_source_path, dest, log_filepath) + + command = [ + "/usr/bin/rsync", + "-rltDvH", + "--chmod=D755,F644", + "-e", "'{}'".format(ssh_opts), + ] + for value in include or []: + command.extend(["--include", shlex.quote(value)]) + for value in exclude or []: + command.extend(["--exclude", shlex.quote(value)]) + command.extend([ + full_source_path, + "{}/".format(dest), + "&>", log_filepath, + ]) + command = " ".join(command) self.log.info("rsyncing of %s to %s started", full_source_path, dest) with self._popen_timeouted(command, shell=True) as cmd: diff --git a/backend/tests/test_config_reader.py b/backend/tests/test_config_reader.py index 2a7c338ed..e1de495df 100644 --- a/backend/tests/test_config_reader.py +++ b/backend/tests/test_config_reader.py @@ -38,7 +38,8 @@ def test_minimal_file_and_defaults(self): opts = BackendConfigReader(self.get_minimal_config_file()).read() assert opts.destdir == "/tmp" assert opts.builds_limits == {'arch': {}, 'tag': {}, 'owner': 20, - 'sandbox': 10, 'arch_per_owner': {}} + 'sandbox': 10, 'arch_per_owner': {}, + 'userssh': 2} def test_correct_build_limits(self): opts = BackendConfigReader( @@ -65,6 +66,7 @@ def test_correct_build_limits(self): 'ppc64le': 11, 's390x': 5, }, + 'userssh': 2, } @pytest.mark.parametrize("broken_config", [ diff --git a/backend/tests/testlib/__init__.py b/backend/tests/testlib/__init__.py index 155a1bb86..88d9b9458 100644 --- a/backend/tests/testlib/__init__.py +++ b/backend/tests/testlib/__init__.py @@ -178,7 +178,8 @@ def _full_source_path(self, src): return src def rsync_download(self, src, dest, logfile=None, max_retries=0, - subprocess_timeout=DEFAULT_SUBPROCESS_TIMEOUT): + subprocess_timeout=DEFAULT_SUBPROCESS_TIMEOUT, + include=None, exclude=None): data = os.environ["TEST_DATA_DIRECTORY"] trail_slash = src.endswith("/") src = os.path.join(data, "build_results", self.resultdir) diff --git a/common/copr_common/helpers.py b/common/copr_common/helpers.py index 2ee56ca43..a7f803c06 100644 --- a/common/copr_common/helpers.py +++ b/common/copr_common/helpers.py @@ -41,3 +41,14 @@ def chroot_to_branch(chroot): elif name == "mageia": abbrev = "mga" return "{}{}".format(abbrev, version) + + +def timedelta_to_dhms(delta): + """ + By default the `datetime.timedelta` provides only days and seconds. Minutes, + hours, and the human friendly number of seconds, needs to be calculated. + """ + days, remainder = divmod(delta.total_seconds(), 24 * 60 * 60) + hours, remainder = divmod(remainder, 60 * 60) + minutes, seconds = divmod(remainder, 60) + return int(days), int(hours), int(minutes), int(seconds) diff --git a/common/python-copr-common.spec b/common/python-copr-common.spec index 59099fa98..32adbbb43 100644 --- a/common/python-copr-common.spec +++ b/common/python-copr-common.spec @@ -16,7 +16,7 @@ %endif Name: python-copr-common -Version: 0.21 +Version: 0.21.1.dev Release: 1%{?dist} Summary: Python code used by Copr diff --git a/common/setup.py b/common/setup.py index 34a78a2a4..496053757 100644 --- a/common/setup.py +++ b/common/setup.py @@ -20,7 +20,7 @@ setup( name='copr-common', - version="0.21", + version="0.21.1.dev", description=__description__, long_description=long_description, author=__author__, diff --git a/frontend/coprs_frontend/alembic/versions/41763f7a5185_add_allow_user_ssh_column.py b/frontend/coprs_frontend/alembic/versions/41763f7a5185_add_allow_user_ssh_column.py new file mode 100644 index 000000000..19790f8e1 --- /dev/null +++ b/frontend/coprs_frontend/alembic/versions/41763f7a5185_add_allow_user_ssh_column.py @@ -0,0 +1,27 @@ +""" +Add allow_user_ssh column + +Revision ID: 41763f7a5185 +Create Date: 2023-11-02 09:30:57.246569 +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '41763f7a5185' +down_revision = 'ec3528516b0c' +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column('build', sa.Column('allow_user_ssh', sa.Boolean(), + server_default='0', nullable=False)) + op.add_column('build', sa.Column('ssh_public_key', sa.Text())) + + +def downgrade(): + op.drop_column('build', 'allow_user_ssh') + op.drop_column('build', 'ssh_public_key') diff --git a/frontend/coprs_frontend/coprs/forms.py b/frontend/coprs_frontend/coprs/forms.py index 81851efed..a98ed9ba5 100644 --- a/frontend/coprs_frontend/coprs/forms.py +++ b/frontend/coprs_frontend/coprs/forms.py @@ -1313,6 +1313,27 @@ def selected_chroots(self): F.packit_forge_project = wtforms.StringField(default=None) + F.allow_user_ssh = wtforms.BooleanField( + "Allow user SSH", + default=False, + false_values=FALSE_VALUES, + ) + F.ssh_public_key = wtforms.TextAreaField("User public SSH key") + + def validate_ssh_public_key(form, field): + if form.allow_user_ssh.data is not True: + return + if field.data: + return + raise wtforms.ValidationError("Please specify the Public SSH key") + + # FIXME It is non-trivial to show validation for our resubmit forms because + # on failure they redirect to a different page and return 500. It would + # require to restructure `coprs_builds.py:_copr_repeat_build` and I don't + # want to do that now. Once it is done, enable the validation by + # uncommenting the following line: + # F.validate_ssh_public_key = validate_ssh_public_key + def _validate_batch_opts(form, field): counterpart = form.with_build_id modifies = False diff --git a/frontend/coprs_frontend/coprs/logic/builds_logic.py b/frontend/coprs_frontend/coprs/logic/builds_logic.py index a051ade7d..7a97ed4c6 100644 --- a/frontend/coprs_frontend/coprs/logic/builds_logic.py +++ b/frontend/coprs_frontend/coprs/logic/builds_logic.py @@ -308,8 +308,10 @@ def get_pending_srpm_build_tasks(cls, background=None, data_type=None): if data_type in ["for_backend", "overview"]: + # FIXME The allow_usr_ssh should not be needed, we allow SSH only + # in chroots. But tests fail because of this load_build_fields = ["is_background", "submitted_by", "batch_id", - "user_id"] + "user_id", "allow_user_ssh"] if data_type == "for_backend": # The custom method allows us to set the chroot for SRPM builds load_build_fields += ["source_type", "source_json"] @@ -360,7 +362,7 @@ def get_pending_build_tasks(cls, background=None, data_type=None): if data_type in ["for_backend", "overview"]: query = query.options( load_only("build_id", "tags_raw"), - joinedload('build').load_only("id", "is_background", "submitted_by", "batch_id") + joinedload('build').load_only("id", "is_background", "submitted_by", "batch_id", "allow_user_ssh") .options( # from copr project info we only need the project name joinedload('copr').load_only("user_id", "group_id", "name") @@ -710,6 +712,8 @@ def create_new(cls, user, copr, source_type, source_json, chroot_names=None, pkg with_build_id=build_options.get("with_build_id"), package_chroots_subset=package_chroots_subset, packit_forge_project=build_options.get("packit_forge_project"), + allow_user_ssh=build_options.get("allow_user_ssh"), + ssh_public_key=build_options.get("ssh_public_key"), ) if "timeout" in build_options: @@ -808,7 +812,8 @@ def add(cls, user, pkgs, copr, source_type=None, source_json=None, git_hashes=None, skip_import=False, background=False, batch=None, srpm_url=None, copr_dirname=None, bootstrap=None, isolation=None, package=None, after_build_id=None, with_build_id=None, - package_chroots_subset=None, packit_forge_project=None): + package_chroots_subset=None, packit_forge_project=None, + allow_user_ssh=None, ssh_public_key=None): coprs_logic.CoprsLogic.raise_if_unfinished_blocking_action( copr, "Can't build while there is an operation in progress: {action}") @@ -862,6 +867,8 @@ def add(cls, user, pkgs, copr, source_type=None, source_json=None, copr_dir=copr_dir, bootstrap=bootstrap, isolation=isolation, + allow_user_ssh=allow_user_ssh, + ssh_public_key=ssh_public_key, ) if timeout: diff --git a/frontend/coprs_frontend/coprs/models.py b/frontend/coprs_frontend/coprs/models.py index 231330deb..a9d10880d 100644 --- a/frontend/coprs_frontend/coprs/models.py +++ b/frontend/coprs_frontend/coprs/models.py @@ -1090,6 +1090,11 @@ def __init__(self, *args, **kwargs): # used by webhook builds; e.g. github.com:praiskup, or pagure.io:jdoe submitted_by = db.Column(db.Text) + # Keep builder alive after the build finishes and allow user SSH access + allow_user_ssh = db.Column(db.Boolean, default=False, + server_default="0", nullable=False) + ssh_public_key = db.Column(db.Text) + # if a build was resubmitted from another build, this column will contain the original build id # the original build id is not here as a foreign key because the original build can be deleted so we can lost # the info that the build was resubmitted @@ -1440,13 +1445,19 @@ def sandbox(self): backend later applies builder user-VM separation policy (VMs are only re-used for builds which have the same build.sandbox value) """ - submitter, _ = self.submitter - if not submitter: - # If we don't know build submitter, use "random" value and keep the - # build separated from any other. - submitter = uuid.uuid4() - - return '{0}--{1}'.format(self.copr.full_name, submitter) + # If we don't know build submitter, use "random" value and keep the + # build separated from any other. + submitter = self.submitter[0] or str(uuid.uuid4()) + parts = [self.copr.full_name, submitter] + + # If user SSH is allowed, use random suffix to keep the builder + # separated from other users and other builds of the same user + if self.allow_user_ssh: + # TODO Maybe + # is_rpm_build = self.source_status in helpers.FINISHED_STATES + # if self.allow_user_ssh and is_rpm_build: + parts.append(str(uuid.uuid4())) + return "--".join(parts) @property def resubmitted_from(self): diff --git a/frontend/coprs_frontend/coprs/templates/coprs/detail/_builds_forms.html b/frontend/coprs_frontend/coprs/templates/coprs/detail/_builds_forms.html index 94ae5a744..472cb65de 100644 --- a/frontend/coprs_frontend/coprs/templates/coprs/detail/_builds_forms.html +++ b/frontend/coprs_frontend/coprs/templates/coprs/detail/_builds_forms.html @@ -215,8 +215,21 @@
+ You will obtain a SSH access to the builder to easily debug your
+ package within the Copr infrastructure. After connecting, run
+ copr-builder help
for complete instructions.
+