diff --git a/backend/copr_backend/background_worker_build.py b/backend/copr_backend/background_worker_build.py index 0aa5ff0ef..c8da5be42 100644 --- a/backend/copr_backend/background_worker_build.py +++ b/backend/copr_backend/background_worker_build.py @@ -39,6 +39,11 @@ MIN_BUILDER_VERSION = "0.68.dev" CANCEL_CHECK_PERIOD = 5 +# See also copr-builder script +USER_SSH_DEFAULT_EXPIRATION = 60 * 60 +USER_SSH_MAX_EXPIRATION = 60 * 60 * 24 * 2 +USER_SSH_EXPIRATION_PATH = "/run/copr-builder-expiration" + MESSAGES = { "give_up_repo": "Giving up waiting for copr_base repository, " @@ -52,6 +57,9 @@ COMMANDS = { "rpm_q_builder": "rpm -q copr-rpmbuild --qf \"%{VERSION}\n\"", + "echo_authorized_keys": "echo {0} >> /root/.ssh/authorized_keys", + "set_expiration": "echo -n {0} > " + USER_SSH_EXPIRATION_PATH, + "cat_expiration": "cat {0}".format(USER_SSH_EXPIRATION_PATH), } @@ -741,6 +749,87 @@ def _add_pubkey(self): self.log.info("Added pubkey for user %s project %s into: %s", user, project, pubkey_path) + def _setup_for_user_ssh(self): + """ + Setup the builder for user SSH + https://github.com/fedora-copr/debate/tree/main/user-ssh-builders + + If the builder setup for user SSH becomes more complicated than just + installing the public key, we might want to move the code to a script + within `copr-builder` and call it here or from `copr-rpmbuild`. There + is no requirement for it to be here. + """ + if not self.job.allow_user_ssh: + return + self._deploy_user_ssh() + self._set_default_expiration() + + def _deploy_user_ssh(self): + """ + Deploy user public key to the builder, so that they can connect via SSH. + """ + cmd = COMMANDS["echo_authorized_keys"].format(self.job.ssh_public_key) + rc, out, err = self.ssh.run_expensive(cmd) + if rc != 0: + self.log.error("Failed to deploy user SSH key for %s", + self.job.project_owner) + return + self.log.info("Deployed user SSH key for %s", self.job.project_owner) + + def _set_default_expiration(self): + """ + Set the default expiration time for the builder + """ + default = self.job.started_on + USER_SSH_DEFAULT_EXPIRATION + cmd = COMMANDS["set_expiration"].format(default) + rc, out, err = self.ssh.run_expensive(cmd) + if rc != 0: + # This only affects the `copr-builder show` command to print unknown + # remaining time. It won't affect the backend in terminating the + # buidler when it is supposed to + self.log.error("Failed to set the default expiration time") + return + self.log.info("The expiration time was set to %s", default) + + def _builder_expiration(self): + """ + Find the user preference for the builder expiration. + """ + rc, out, err = self.ssh.run_expensive(COMMANDS["cat_expiration"]) + if rc == 0: + try: + return datetime.fromtimestamp(float(out)) + except ValueError: + pass + self.log.error("Unable to query builder expiration file") + + def _keep_alive_for_user_ssh(self): + """ + Wait until user releases the VM or until it expires. + """ + # TODO Should we use CancellableThreadTask for the implementation? + if not self.job.allow_user_ssh: + return + + # We are calculating the limits from when the job started but we may + # want to consider starting the watch when job ends. + default = datetime.fromtimestamp( + self.job.started_on + USER_SSH_DEFAULT_EXPIRATION) + maxlimit = datetime.fromtimestamp( + self.job.started_on + USER_SSH_MAX_EXPIRATION) + + self.log.info("Keeping builder alive for user SSH") + while True: + expiration = self._builder_expiration() or default + if datetime.now() > expiration: + self.log.warning("VM expired, it will be shut-down soon") + break + if datetime.now() > maxlimit: + msg = "VM exceeded max limit, it will be shut-down soon" + self.log.warning(msg) + break + time.sleep(60) + def build(self, attempt): """ Attempt to build. @@ -755,21 +844,7 @@ def build(self, attempt): self._fill_build_info_file() self._cancel_if_requested() self._mark_running(attempt) - - # TODO Move this to a separate method/class - if self.job.allow_user_ssh: - # TODO Move the command to COMMANDS or somewhere else - # TODO Maybe have a script on builder that will do all the steps - # TODO ^^ The script could be called from copr-rpmbuild ... maybe - # no backend change is needed - cmd = ("echo {0} >> /root/.ssh/authorized_keys" - .format(self.job.ssh_public_key)) - rc, out, err = self.ssh.run_expensive(cmd) - if rc != 0: - # TODO Log error - pass - # TODO Log success - + self._setup_for_user_ssh() self._start_remote_build() transfer_failure = CancellableThreadTask( self._transfer_log_file, @@ -783,34 +858,7 @@ def build(self, attempt): raise BuildRetry("SSH problems when downloading live log: {}" .format(transfer_failure)) - if self.job.allow_user_ssh: - # TODO Use CancellableThreadTask - - self.log.info("Keeping builder alive for user SSH") - while True: - started = datetime.fromtimestamp(self.job.started_on) - # expiration = started + timedelta(minutes=30) - expiration = started + timedelta(minutes=5) - - cmd = "cat /run/copr-builder-expiration" - rc, out, err = self.ssh.run_expensive(cmd) - if rc == 0: - try: - expiration = datetime.fromtimestamp(float(out)) - except ValueError: - pass - - if datetime.now() > expiration: - break - - # TODO break if uptime of the instance exceeded our max limit - # It can't actually be uptime but rather time after the build - # finishes. Or ... uptime, but we need to mention it in help - # so that kernel packagers know to kill the rpmbuild and run it - # manually ASAP or there won't be enough time. - - time.sleep(60) - + self._keep_alive_for_user_ssh() # TODO Should the results be downloaded for jobs with user SSH? self._download_results() self._drop_host() diff --git a/rpmbuild/bin/copr-builder b/rpmbuild/bin/copr-builder index 91d8ee74e..380de27d8 100755 --- a/rpmbuild/bin/copr-builder +++ b/rpmbuild/bin/copr-builder @@ -14,6 +14,8 @@ from copr_common.helpers import timedelta_to_dhms RPMBUILD_PID_PATH = "/var/lib/copr-rpmbuild/pid" EXPIRATION_PATH = "/run/copr-builder-expiration" + +# See also background_worker_build.py DEFAULT_EXPIRATION = 60 * 60 MAX_EXPIRATION = 60 * 60 * 24 * 2 @@ -86,11 +88,6 @@ class CMDShow: with open("/proc/uptime", "r") as f: return float(f.readline().split()[0]) - @property - def max_uptime_seconds(self): - # uptime_seconds + 48 hours - pass - @property def expiration(self): # TODO We are implementing the same thing on backend, move it to common