From d5cc291fa7ef05ee276ba4a89d6404c2a38d0a3d Mon Sep 17 00:00:00 2001 From: Tim Theisen Date: Fri, 29 Sep 2023 14:05:18 -0500 Subject: [PATCH] Preliminary HTCondor-CE 23 docs --- docs/architecture.md | 6 +- docs/index.md | 4 +- .../configuration/authentication.md | 57 +--- .../configuration/htcondor-routes.md | 0 .../configuration/job-router-overview.md | 8 + .../configuration/local-batch-system.md | 0 .../configuration/non-htcondor-routes.md | 0 .../configuration/optional-configuration.md | 75 ++++++ .../configuration/writing-job-routes.md | 9 + .../installation/central-collector.md | 10 +- docs/{v5 => v23}/installation/htcondor-ce.md | 11 +- docs/{v5 => v23}/operation.md | 13 +- docs/{v5 => v23}/reference.md | 0 docs/v23/releases.md | 54 ++++ docs/{v5 => v23}/remote-job-submission.md | 0 .../troubleshooting/common-issues.md | 28 +- .../troubleshooting/debugging-tools.md | 2 +- docs/{v5 => v23}/troubleshooting/logs.md | 18 -- .../troubleshooting/remote-troubleshooting.md | 5 - docs/v5/releases.md | 255 ------------------ docs/v6/releases.md | 6 - mkdocs.yml | 46 ++-- 22 files changed, 186 insertions(+), 421 deletions(-) rename docs/{v5 => v23}/configuration/authentication.md (66%) rename docs/{v5 => v23}/configuration/htcondor-routes.md (100%) rename docs/{v5 => v23}/configuration/job-router-overview.md (90%) rename docs/{v5 => v23}/configuration/local-batch-system.md (100%) rename docs/{v5 => v23}/configuration/non-htcondor-routes.md (100%) rename docs/{v5 => v23}/configuration/optional-configuration.md (56%) rename docs/{v5 => v23}/configuration/writing-job-routes.md (97%) rename docs/{v5 => v23}/installation/central-collector.md (97%) rename docs/{v5 => v23}/installation/htcondor-ce.md (94%) rename docs/{v5 => v23}/operation.md (86%) rename docs/{v5 => v23}/reference.md (100%) create mode 100644 docs/v23/releases.md rename docs/{v5 => v23}/remote-job-submission.md (100%) rename docs/{v5 => v23}/troubleshooting/common-issues.md (95%) rename docs/{v5 => v23}/troubleshooting/debugging-tools.md (99%) rename docs/{v5 => v23}/troubleshooting/logs.md (96%) rename docs/{v5 => v23}/troubleshooting/remote-troubleshooting.md (97%) delete mode 100644 docs/v5/releases.md diff --git a/docs/architecture.md b/docs/architecture.md index 387a04321..28067ff0f 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -54,7 +54,7 @@ owners that want to start contributing to a computing grid with minimal effort. ![HTCondor-CE-Bosco](img/bosco.png) If your site intends to run over 10,000 concurrent pilot jobs, you will need to host your own -[HTCondor-CE](v6/installation/htcondor-ce.md) because the Hosted CE has not yet been optimized for such loads. +[HTCondor-CE](v23/installation/htcondor-ce.md) because the Hosted CE has not yet been optimized for such loads. How the CE is Customized ------------------------ @@ -63,11 +63,11 @@ Aside from the [basic configuration] required in the CE installation, there are you decide any customization is required at all): - **Deciding which Virtual Organizations (VOs) are allowed to run at your site:** HTCondor-CE leverages HTCondor's - built-in ability to [authenticate incoming jobs](v6/configuration/authentication.md) based on their OAuth + built-in ability to [authenticate incoming jobs](v23/configuration/authentication.md) based on their OAuth token credentials. - **How to filter and transform the pilot jobs to be run on your batch system:** Filtering and transforming pilot jobs (i.e., setting site-specific attributes or resource limits), requires configuration of your site’s job routes. - For examples of common job routes, consult the [job router configuration](v6/configuration/job-router-overview.md) + For examples of common job routes, consult the [job router configuration](v23/configuration/job-router-overview.md) pages. How Security Works diff --git a/docs/index.md b/docs/index.md index 35a977db8..ad2d89cfa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -40,7 +40,7 @@ Benefits of running the HTCondor-CE: - **Scalability:** HTCondor-CE is capable of supporting ~16k concurrent RARs - **Debugging tools:** HTCondor-CE offers - [many tools to help troubleshoot](v6/troubleshooting/debugging-tools.md) issues with RARs + [many tools to help troubleshoot](v23/troubleshooting/debugging-tools.md) issues with RARs - **Routing as configuration:** HTCondor-CE’s mechanism to transform and submit RARs is customized via configuration variables, which means that customizations will persist across upgrades and will not involve modification of software internals to route jobs @@ -48,7 +48,7 @@ Benefits of running the HTCondor-CE: Getting HTCondor-CE ------------------- -Learn how to get and install HTCondor-CE through our [documentation](v6/installation/htcondor-ce.md). +Learn how to get and install HTCondor-CE through our [documentation](v23/installation/htcondor-ce.md). Contact Us ---------- diff --git a/docs/v5/configuration/authentication.md b/docs/v23/configuration/authentication.md similarity index 66% rename from docs/v5/configuration/authentication.md rename to docs/v23/configuration/authentication.md index 459650733..606d91e8b 100644 --- a/docs/v5/configuration/authentication.md +++ b/docs/v23/configuration/authentication.md @@ -1,13 +1,8 @@ Configuring Authentication ========================== -To authenticate job submission from external users and VOs, HTCondor-CE can be configured to use -[built-in mapfiles](#built-in-mapfiles) or to make [Globus callouts](#globus-callout) to an external service like Argus -or LCMAPS. -The former option is simpler but the latter option may be preferred if your grid supports it or your site already runs -such a service. - -Additionally, the HTCondor-CE service uses [X.509 certificates](#configuring-certificates) for SciTokens, SSL, and GSI +To authenticate job submission from external users and VOs, +the HTCondor-CE service uses [X.509 certificates](#configuring-certificates) for SciTokens and SSL authentication. Built-in Mapfiles @@ -44,56 +39,10 @@ in `/etc/condor-ce/mapfiles.d/`: SCITOKENS /^https:\/\/scitokens.org\/osg-connect,.*/ osg ``` -### GSI ### - -To allow clients with GSI proxies with to submit jobs to your HTCondor-CE, add lines of the following format: - -``` -GSI /^$/ -``` - -Replacing `` (escaping any `/` with `\/`) and `` with the distinguished name of the -incoming certificate and the unix account under which the job should run, respectively. -VOMS attributes of incoming X.509 proxy certificates can also be used for mapping: - -``` -GSI /,,,...,/ -``` - -Replacing `` (escaping any `/` with `\/`), `` fields, and `` with the -distinguished name of the incoming certificate, the VOMS roles and groups, and the unix account under which the job -should run, respectively. -For example, to map any certificate from the `GLOW` VO with the `htpc` role to the `glow` user, add the following line -to a `*.conf` file in `/etc/condor-ce/mapfiles.d/`: - -``` -GSI /.*,\/GLOW\/Role=htpc.*/ glow -``` - -Globus Callout --------------- - -To use a Globus callout to a service like LCMAPS or Argus, you will need to have the relevant library installed as well -as the following HTCondor-CE configuration: - -1. Add the following line to the top of `/etc/condor-ce/condor_mapfile`: - - GSI /(.*)/ GSS_ASSIST_GRIDMAP - -1. Create `/etc/grid-security/gsi-authz.conf` with the following content: - - - For LCMAPS: - - globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout - - - For Argus: - - globus_mapping /usr/lib64/libgsi_pep_callout.so argus_pep_callout - Configuring Certificates ------------------------ -HTCondor-CE uses X.509 host certificates and certificate authorities (CAs) when authenticating SciToken, SSL, and GSI +HTCondor-CE uses X.509 host certificates and certificate authorities (CAs) when authenticating SciToken and SSL connections. By default, HTCondor-CE uses the default system locations to locate CAs and host certificate when authenticating SciToken and SSL connections. diff --git a/docs/v5/configuration/htcondor-routes.md b/docs/v23/configuration/htcondor-routes.md similarity index 100% rename from docs/v5/configuration/htcondor-routes.md rename to docs/v23/configuration/htcondor-routes.md diff --git a/docs/v5/configuration/job-router-overview.md b/docs/v23/configuration/job-router-overview.md similarity index 90% rename from docs/v5/configuration/job-router-overview.md rename to docs/v23/configuration/job-router-overview.md index 9015ee864..b87f9a19e 100644 --- a/docs/v5/configuration/job-router-overview.md +++ b/docs/v23/configuration/job-router-overview.md @@ -40,6 +40,14 @@ in the following order: ### Deprecated syntax ### +!!! warning "Planned Removal of Deprecated Syntax" + - `JOB_ROUTER_DEFAULTS`, `JOB_ROUTER_ENTRIES`, `JOB_ROUTER_ENTRIES_CMD`, and `JOB_ROUTER_ENTRIES_FILE` are + deprecated and will be removed for *V24* of the HTCondor Software Suite. New configuration syntax for the job router + is defined using `JOB_ROUTER_ROUTE_NAMES` and `JOB_ROUTER_ROUTE_[name]`. + - For new syntax example vist: + [HTCondor Documentation - Job Router](https://htcondor.readthedocs.io/en/latest/grid-computing/job-router.html#an-example-configuration) + - **Note:** The removal will occur during the lifetime of the HTCondor *V23* feature series. + Since the inception of HTCondor-CE, job routes have been written as a [list of ClassAds](https://htcondor.readthedocs.io/en/lts/grid-computing/job-router.html#deprecated-router-configuration). Each job route’s [ClassAd](http://research.cs.wisc.edu/htcondor/manual/v8.6/4_1HTCondor_s_ClassAd.html) is constructed diff --git a/docs/v5/configuration/local-batch-system.md b/docs/v23/configuration/local-batch-system.md similarity index 100% rename from docs/v5/configuration/local-batch-system.md rename to docs/v23/configuration/local-batch-system.md diff --git a/docs/v5/configuration/non-htcondor-routes.md b/docs/v23/configuration/non-htcondor-routes.md similarity index 100% rename from docs/v5/configuration/non-htcondor-routes.md rename to docs/v23/configuration/non-htcondor-routes.md diff --git a/docs/v5/configuration/optional-configuration.md b/docs/v23/configuration/optional-configuration.md similarity index 56% rename from docs/v5/configuration/optional-configuration.md rename to docs/v23/configuration/optional-configuration.md index fe1f667ca..6178516bf 100644 --- a/docs/v5/configuration/optional-configuration.md +++ b/docs/v23/configuration/optional-configuration.md @@ -54,6 +54,81 @@ configuring them in unison. START_LOCAL_UNIVERSE = False START_SCHEDULER_UNIVERSE = $(START_LOCAL_UNIVERSE) +Inserting IDTOKENs into the routed job's sandbox +------------------------------------------ + +If you want to insert IDTOKENS into the routed job's sandbox you can use the `SendIDTokens` route command, or +the `JOB_ROUTER_SEND_ROUTE_IDTOKENS` global configuration variable. Tokens +sent using this mechanism must be named and declared using the `JOB_ROUTER_CREATE_IDTOKEN_NAMES` +and [`JOB_ROUTER_CREATE_IDTOKEN_`](https://htcondor.readthedocs.io/en/latest/admin-manual/configuration-macros.html#JOB_ROUTER_CREATE_IDTOKEN_%3CNAME%3E) configuration variables. Tokens whose names are declared in +the `JOB_ROUTER_SEND_ROUTE_IDTOKENS` configuration variable are sent by default for each route that does +not have a `SendIDTokens` command. + +- **To declare IDTOKENS for inclusion in glide-in jobs** for the purpose of advertising to a collector + add something like the following to `/etc/condor-ce/config.d/99-local-ce-token.conf`: + + JOB_ROUTER_CREATE_IDTOKEN_NAMES = name1 name2 + JOB_ROUTER_CREATE_IDTOKEN_name1 @=end + sub = "name1@users.htcondor.org" + kid = "POOL" + lifetime = 3900 + scope = "ADVERTISE_STARTD, ADVERTISE_MASTER, READ" + dir = "/etc/condor-ce/gltokens/name1" + filename = "ce_name1.idtoken" + owner = "owner1" + @end + JOB_ROUTER_CREATE_IDTOKEN_Name2 @=end + sub = "name2@users.htcondor.org" + kid = "POOL" + lifetime = 3900 + scope = "ADVERTISE_STARTD, ADVERTISE_MASTER, READ" + dir = "/etc/condor-ce/gltokens/name2" + filename = "ce_name2.idtoken" + owner = "owner2" + @end + +- **To insert one of the above IDTOKENS in the sandbox of a routed job**, include the token name in the `SendIDTokens` route + command like this. + + SendIDTokens = "Name2" + !!! note "Route commands" + `SendIDTokens` is a route command, not a job attribute. + This means that you will not be able to manipulate it through + [transform verbs](writing-job-routes.md#editing-attributes) such as `EVALSET`. + **To add an IDTOKEN to a routed job in addition to the default tokens**, build a string containing the token name + along with the value of the global configuration variable like this. + + SendIDTokens = "Name2 $(JOB_ROUTER_SEND_ROUTE_IDTOKENS)" + + **You can use an attribute of the source job** to choose the IDTOKEN by writing an expression like this. + + SendIDTokens = strcat( My.Owner, " $(JOB_ROUTER_SEND_ROUTE_IDTOKENS)") + + It is presumed that the value of `My.Owner` above is the same as the `` of an IDTOKEN and as the `owner` field + of that token. For instance, the Fermilab CE config uses the above `SendIDTokens` expression and + the following token declarations at the time of this guide. + + JOB_ROUTER_CREATE_IDTOKEN_NAMES = fermilab3 osg + JOB_ROUTER_CREATE_IDTOKEN_fermilab3 @=end + sub = "fermilabpilot@fnal.gov" + kid = "POOL" + lifetime = 3900 + scope = "ADVERTISE_STARTD, ADVERTISE_MASTER, READ" + dir = "/etc/condor-ce/gltokens/fermilab" + filename = "ce_fermilab3.idtoken" + owner = "fermilab" + @end + JOB_ROUTER_CREATE_IDTOKEN_osg @=end + sub = "osgpilot@fnal.gov" + kid = "POOL" + lifetime = 600 + scope = "ADVERTISE_STARTD, ADVERTISE_MASTER, READ" + dir = "/etc/condor-ce/gltokens/fermilab" + filename = "ce_osg.idtoken" + owner = "osg" + @end + + Enabling the Monitoring Web Interface ------------------------------------- diff --git a/docs/v5/configuration/writing-job-routes.md b/docs/v23/configuration/writing-job-routes.md similarity index 97% rename from docs/v5/configuration/writing-job-routes.md rename to docs/v23/configuration/writing-job-routes.md index acd70c059..0cccd9b69 100644 --- a/docs/v5/configuration/writing-job-routes.md +++ b/docs/v23/configuration/writing-job-routes.md @@ -24,6 +24,15 @@ Each example is displayed in code blocks with tabs to switch between the two syn Syntax Differences ------------------ +!!! warning "Planned Removal of Deprecated Syntax" + - `JOB_ROUTER_DEFAULTS`, `JOB_ROUTER_ENTRIES`, `JOB_ROUTER_ENTRIES_CMD`, and `JOB_ROUTER_ENTRIES_FILE` are + deprecated and will be removed for *V24* of the HTCondor Software Suite. New configuration syntax for the job router + is defined using `JOB_ROUTER_ROUTE_NAMES` and `JOB_ROUTER_ROUTE_[name]`. + - For new syntax example vist: + [HTCondor Documentation - Job Router](https://htcondor.readthedocs.io/en/latest/grid-computing/job-router.html#an-example-configuration) + - **Note:** The removal will occur during the lifetime of the HTCondor *V23* feature series. + + In HTCondor-CE 5, the [deprecated syntax](job-router-overview.md#deprecated-syntax) continues to be the default and administrator's can move to the [ClassAd transform syntax](job-router-overview.md#classad-transforms) by setting the following in a file in `/etc/condor-ce/config.d/`: diff --git a/docs/v5/installation/central-collector.md b/docs/v23/installation/central-collector.md similarity index 97% rename from docs/v5/installation/central-collector.md rename to docs/v23/installation/central-collector.md index 781a216df..f5bdce9ce 100644 --- a/docs/v5/installation/central-collector.md +++ b/docs/v23/installation/central-collector.md @@ -25,7 +25,7 @@ Before starting the installation process, consider the following points (consulting [the reference page](../reference.md) as necessary): - **User IDs:** If they do not exist already, the installation will create the `condor` Linux user (UID 4716) -- **SSL certificate:** The HTCondor-CE Central Collector service uses a host certificate and key for SSL and GSI +- **SSL certificate:** The HTCondor-CE Central Collector service uses a host certificate and key for SSL authentication - **DNS entries:** Forward and reverse DNS must resolve for the HTCondor-CE Central Collector host - **Network ports:** Site HTCondor-CEs must be able to contact the Central Collector on port 9619 (TCP). @@ -55,11 +55,6 @@ Installing a Central Collector This command will update **all** packages -1. Install the `fetch-crl` package, available from the EPEL repositories. - - :::console - root@host # yum install fetch-crl - 1. Install the Central Collector software: :::console @@ -69,7 +64,7 @@ Configuring a Central Collector ------------------------------- Like a site HTCondor-CE, the Central Collector uses X.509 host certificates and certificate authorities (CAs) when -authenticating SSL and GSI connections. +authenticating SSL connections. By default, the Central Collector uses the default system locations to locate CAs and host certificate when authenticating SSL connections, i.e. for SSL authentication methods. But traditionally, the Central Collector and HTCondor-CEs have authenticated with each other using specialized grid @@ -204,7 +199,6 @@ The specific services are: | Software | Service name | |:------------|:--------------------------------------| -| Fetch CRL | `fetch-crl-boot` and `fetch-crl-cron` | | HTCondor-CE | `condor-ce-collector` | Start and enable the services in the order listed and stop them in reverse order. diff --git a/docs/v5/installation/htcondor-ce.md b/docs/v23/installation/htcondor-ce.md similarity index 94% rename from docs/v5/installation/htcondor-ce.md rename to docs/v23/installation/htcondor-ce.md index bd0440bfd..ba2099f0f 100644 --- a/docs/v5/installation/htcondor-ce.md +++ b/docs/v23/installation/htcondor-ce.md @@ -1,5 +1,5 @@ -Installing HTCondor-CE 5 -======================== +Installing HTCondor-CE 23 +========================= !!! tip "Joining the Open Science Grid (OSG)?" If you are installing an HTCondor-CE for the OSG, consult the @@ -21,7 +21,7 @@ Before starting the installation process, consider the following points (consulting [the reference page](../reference.md) as necessary): - **User IDs:** If they do not exist already, the installation will create the `condor` Linux user (UID 4716) -- **SSL certificate:** The HTCondor-CE service uses a host certificate and key for SSL and GSI authentication +- **SSL certificate:** The HTCondor-CE service uses a host certificate and key for SSL authentication - **DNS entries:** Forward and reverse DNS must resolve for the HTCondor-CE host - **Network ports:** The pilot factories must be able to contact your HTCondor-CE service on port 9619 (TCP) - **Submit host:** HTCondor-CE should be installed on a host that already has the ability to submit jobs into your @@ -58,11 +58,6 @@ Installing HTCondor-CE This command will update **all** packages -1. Install the `fetch-crl` package, available from the EPEL repositories. - - :::console - root@host # yum install fetch-crl - 1. Select the appropriate convenience RPM: | If your batch system is... | Then use the following package... | diff --git a/docs/v5/operation.md b/docs/v23/operation.md similarity index 86% rename from docs/v5/operation.md rename to docs/v23/operation.md index 8257e98bc..35f8513b1 100644 --- a/docs/v5/operation.md +++ b/docs/v23/operation.md @@ -12,7 +12,6 @@ The specific services are: | Software | Service name | |:-----------------------------|:--------------------------------------------| -| Fetch CRL | `fetch-crl-boot` and `fetch-crl-cron` | | Your batch system | `condor` or `pbs_server` or … | | HTCondor-CE | `condor-ce` | | **(Optional)** APEL uploader | `condor-ce-apel` and `condor-ce-apel.timer` | @@ -62,20 +61,12 @@ before trying to operate the HTCondor-CE again. Checking User Authentication ---------------------------- -There are two primary authentication methods for submitting jobs to -an HTCondor-CE: GSI (currently being phased out) and SciTokens. +The authentication method for submitting jobs to +an HTCondor-CE is SciTokens. To see which authentication method and identity were used to submit a particular job (or modify existing jobs), you can look in `/var/log/condor-ce/AuditLog`. -If GSI authentication was used, you'll see a set of lines like this: - -``` -10/15/21 17:52:32 (cid:14) (D_AUDIT) Command=QMGMT_WRITE_CMD, peer=<172.17.0.2:41045> -10/15/21 17:52:32 (cid:14) (D_AUDIT) AuthMethod=GSI, AuthId=/DC=org/DC=opensciencegrid/C=US/O=OSG Software/OU=People/CN=testuser, CondorId=testuser@users.htcondor.org -10/15/21 17:52:32 (cid:14) (D_AUDIT) Submitting new job 1.0 -``` - If SciTokens authentication was used, you'll see a set of lines like this: ``` diff --git a/docs/v5/reference.md b/docs/v23/reference.md similarity index 100% rename from docs/v5/reference.md rename to docs/v23/reference.md diff --git a/docs/v23/releases.md b/docs/v23/releases.md new file mode 100644 index 000000000..8faf09b41 --- /dev/null +++ b/docs/v23/releases.md @@ -0,0 +1,54 @@ +Releases +======== + +HTCondor-CE 23 is distributed via RPM and are available from the following Yum repositories: + +- [HTCondor stable and current channels](https://research.cs.wisc.edu/htcondor/downloads/) +- [Open Science Grid](https://opensciencegrid.org/docs/common/yum/) + + +Known Issues +------------ + +Known bugs affecting HTCondor-CEs can be found in +[Jira](https://opensciencegrid.atlassian.net/issues/?jql=project%20%3D%20HTCONDOR%20AND%20status%20not%20in%20(done%2C%20abandoned)%20and%20component%20%3D%20htcondor-ce%20and%20issuetype%20%3D%20bug) + +Updating to HTCondor-CE 23 +-------------------------- + +!!! note "Updating from HTCondor-CE < 6" + If updating to HTCondor-CE 23 from HTCondor-CE < 5, be sure to also consult the HTCondor-CE 6 + [upgrade instructions](../v6/releases.md#500). + +!!! tip "Finding relevant configuration changes" + When updating HTCondor-CE RPMs, `.rpmnew` and `.rpmsave` files may be created containing new defaults that you + should merge or new defaults that have replaced your customzations, respectively. + To find these files for HTCondor-CE, run the following command: + + :::console + root@host # find /etc/condor-ce/ -name '*.rpmnew' -name '*.rpmsave' + +HTCondor-CE 23 is very close in functionality yo HTCondor-CE 6. +As such, upgrading should be very easy. + +HTCondor-CE 23 Version History +------------------------------ + +This section contains release notes for each version of HTCondor-CE 23. +Full HTCondor-CE version history can be found on [GitHub](https://github.com/htcondor/htcondor-ce/releases). + +### 23.0.0 ### + +[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v23.0.0) includes the following new features: + +- Add grid CA and host certificate/key locations to default SSL search paths +- Verifies that HTCondor-CE can access the local HTCondor's SPOOL directory +- Can use condor\_ce\_trace without SciToken to test batch system integration +- condor\_ce\_upgrade\_check checks compatibility with HTCondor 23.0 +- Adds deprecation warnings for old job router configuration syntax + +Getting Help +------------ + +If you have any questions about the release process or run into issues with an upgrade, please +[contact us](../index.md#contact-us) for assistance. diff --git a/docs/v5/remote-job-submission.md b/docs/v23/remote-job-submission.md similarity index 100% rename from docs/v5/remote-job-submission.md rename to docs/v23/remote-job-submission.md diff --git a/docs/v5/troubleshooting/common-issues.md b/docs/v23/troubleshooting/common-issues.md similarity index 95% rename from docs/v5/troubleshooting/common-issues.md rename to docs/v23/troubleshooting/common-issues.md index 6e75ed28e..12e31e136 100644 --- a/docs/v5/troubleshooting/common-issues.md +++ b/docs/v23/troubleshooting/common-issues.md @@ -48,30 +48,11 @@ user@host $ yum reinstall htcondor-ce htcondor-ce-client blahp ### Verify clocks are synchronized -Like all GSI-based authentication, HTCondor-CE is sensitive to time skews. Make sure the clock on your CE is +Like all network-based authentication, HTCondor-CE is sensitive to time skews. Make sure the clock on your CE is synchronized using a utility such as `ntpd`. Additionally, HTCondor itself is sensitive to time skews on the NFS server. If you see empty stdout / err being returned to the submitter, verify there is no NFS server time skew. -### Verify host certificates and CRLs are valid - -An expired host certificate or CRLs will cause various issues with GSI authentication. -Verify that your host certificate is valid by running: - -```console -root@host # openssl x509 -in /etc/grid-security/hostcert.pem -noout -dates -``` - -Likewise, run the `fetch-crl` script to update your CRLs: - -```console -root@host # fetch-crl -``` - -If updating CRLs fix your issues, make sure that the `fetch-crl-cron` and -`fetch-crl-boot` services are enabled and running. - - HTCondor-CE Troubleshooting Items --------------------------------- @@ -284,13 +265,6 @@ If a SciToken can't be mapped and the `D_SECURITY` debug level is enabled, then user's authentication method and identity are present (possibly via a regular expression), and that the mapped OS account exists on your CE and cluster. -1. If using GSI, check voms-mapfile or grid-mapfile as an alternate - file with a mapping for the user's identity. -1. If using LCMAPS, check for LCMAPS errors in `/var/log/messages`. -1. If you do not see helpful LCMAPS error messages in `/var/log/messages`, - adjust the debug level by adding `export LCMAPS_DEBUG_LEVEL=5` to - `/etc/sysconfig/condor-ce`, restarting the condor-ce service, and - checking `/var/log/messages` for errors again. ### Jobs stay idle on the CE diff --git a/docs/v5/troubleshooting/debugging-tools.md b/docs/v23/troubleshooting/debugging-tools.md similarity index 99% rename from docs/v5/troubleshooting/debugging-tools.md rename to docs/v23/troubleshooting/debugging-tools.md index bbc922dd2..c0c051b8e 100644 --- a/docs/v5/troubleshooting/debugging-tools.md +++ b/docs/v23/troubleshooting/debugging-tools.md @@ -72,7 +72,7 @@ condor_ce_run ### Usage ### -Similar to `globus-job-run`, `condor_ce_run` is a tool that submits a simple job to your CE, so it is useful for quickly +`condor_ce_run` is a tool that submits a simple job to your CE, so it is useful for quickly submitting jobs through your CE. To submit a job to the CE and run the `env` command on the remote batch system: diff --git a/docs/v5/troubleshooting/logs.md b/docs/v23/troubleshooting/logs.md similarity index 96% rename from docs/v5/troubleshooting/logs.md rename to docs/v23/troubleshooting/logs.md index 0af3c96ee..2e15706e9 100644 --- a/docs/v5/troubleshooting/logs.md +++ b/docs/v23/troubleshooting/logs.md @@ -267,24 +267,6 @@ This log is a good place to check if experiencing connectivity issues with HTCon :::console root@host # condor_ce_reconfig -Messages Log ------------- - -The messages file can include output from lcmaps, which handles mapping of X.509 proxies to Unix usernames. -If there are issues with the [authentication setup](../configuration/authentication.md), the -errors may appear here. - -- Location: `/var/log/messages` -- Key contents: User authentication - -### What to look for ### - -A user is mapped: - -``` -Oct 6 10:35:32 osgserv06 htondor-ce-llgt[12147]: Callout to "LCMAPS" returned local user (service condor): "osgglow01" -``` - BLAHP Configuration File ------------------------ diff --git a/docs/v5/troubleshooting/remote-troubleshooting.md b/docs/v23/troubleshooting/remote-troubleshooting.md similarity index 97% rename from docs/v5/troubleshooting/remote-troubleshooting.md rename to docs/v23/troubleshooting/remote-troubleshooting.md index 3675d2504..40b288f6d 100644 --- a/docs/v5/troubleshooting/remote-troubleshooting.md +++ b/docs/v23/troubleshooting/remote-troubleshooting.md @@ -15,11 +15,6 @@ can be contacted on its HTCondor-CE port (default: `9619`) at the specified full ### Verifying DNS ### -!!! tip "Reverse DNS and GSI authentication" - GSI authentication requires that the HTCondor-CE host has a reverse DNS record but that record is not required to - match the forward DNS record! For example, if you have an `A` record `htcondor-ce.chtc.wisc.edu -> 123.4.5.678`, a - `PTR` of `123.4.5.678 -> chtc.wisc.edu` would satisfy the GSI authentication requirement. - As noted in the [HTCondor-CE installation document](../installation/htcondor-ce.md), an HTCondor-CE must have forward and reverse DNS records. To verify DNS, use a tool like `nslookup`: diff --git a/docs/v5/releases.md b/docs/v5/releases.md deleted file mode 100644 index 9fa6b5da7..000000000 --- a/docs/v5/releases.md +++ /dev/null @@ -1,255 +0,0 @@ -Releases -======== - -HTCondor-CE 5 is distributed via RPM and are available from the following Yum repositories: - -- [HTCondor stable and current channels](https://research.cs.wisc.edu/htcondor/downloads/) -- [Open Science Grid](https://opensciencegrid.org/docs/common/yum/) - - -Known Issues ------------- - -Known bugs affecting HTCondor-CEs can be found in -[Jira](https://opensciencegrid.atlassian.net/issues/?jql=project%20%3D%20HTCONDOR%20AND%20status%20not%20in%20(done%2C%20abandoned)%20and%20component%20%3D%20htcondor-ce%20and%20issuetype%20%3D%20bug) -In particular, the following bugs are of note: - -- C-style comments, e.g. `/* comment */`, in `JOB_ROUTER_ENTRIES` will prevent the JobRouter from routing jobs - ([HTCONDOR-864](https://opensciencegrid.atlassian.net/browse/HTCONDOR-864)). - For the time being, remove any comments if you are still using the - [deprecated syntax](configuration/job-router-overview.md#deprecated-syntax). - -Updating to HTCondor-CE 5 -------------------------- - -!!! tip "Finding relevant configuration changes" - When updating HTCondor-CE RPMs, `.rpmnew` and `.rpmsave` files may be created containing new defaults that you - should merge or new defaults that have replaced your customzations, respectively. - To find these files for HTCondor-CE, run the following command: - - :::console - root@host # find /etc/condor-ce/ -name '*.rpmnew' -name '*.rpmsave' - -HTCondor-CE 5 is a major release that adds many features and overhauls the default configuration. -As such, upgrades from older versions of HTCondor-CE may require manual intervention: - -### Support for ClassAd transforms added to the JobRouter ### - -!!! danger "Transforms will override `JOB_ROUTER_ENTRIES` routes with the same name" - Even if you do not plan on immediately using the new syntax, it's important to note that route transforms will - override `JOB_ROUTER_ENTRIES` routes with the same name. - In other words, the route transform names returned by `condor_ce_config_val -dump -v JOB_ROUTER_ROUTE_` should only - appear in your list of used routes returned by `condor_ce_config_val JOB_ROUTER_ROUTE_NAMES` if you - intend to use the new transform syntax. - -HTCondor-CE now includes default [ClassAd transforms](https://htcondor.readthedocs.io/en/lts/classads/transforms.html) -equivalent to its `JOB_ROUTER_DEFAULTS`, allowing administrators to write job routes using the transform synatx. -The old syntax continues to be the default in HTCondor-CE 5. -Writing routes in the new syntax provides many benefits including: - -- Statements being evaluated in the order they are written -- Use of variables that are not included in the resultant job ad -- Use of simple case statements. - -Additionally, it is now easier to include transforms that should be evaluated before or after your routes by including -transforms in the lists of `JOB_ROUTER_PRE_ROUTE_TRANSFORM_NAMES` and `JOB_ROUTER_PRE_ROUTE_TRANSFORM_NAMES`, -respectively. To use the new transform syntax: - -1. Disable use of `JOB_ROUTER_ENTRIES` by setting the following in `/etc/condor-ce/config.d/`: - - :::console - JOB_ROUTER_USE_DEPRECATED_ROUTER_ENTRIES = False - -1. Set `JOB_ROUTER_ROUTE_` to a job route in the new transform syntax where `` is the name of - the route that you'd like to be reflected in logs and tool output. - -1. Add the above `` to the list of routes in `JOB_ROUTER_ROUTE_NAMES` - -### New `condor_mapfile` format and locations ### - -HTCondor-CE 5 separates its -[unified mapfile](https://htcondor.readthedocs.io/en/lts/admin-manual/security.html#the-unified-map-file-for-authentication) -used for authentication between multiple files across multiple directories. -Additionally, any regular expressions in the second field must be enclosed by `/`. -To update your mappings to the new format and location, perform the following actions: - -1. Upon upgrade, your existing mapfile will be moved to `/etc/condor-ce/condor_mapfile.rpmsave`. - Remove any of the following lines provided by default in the HTCondor-CE packaging: - - GSI (.*) GSS_ASSIST_GRIDMAP - SSL "[-.A-Za-z0-9/= ]*/CN=([-.A-Za-z0-9/= ]+)" \1@unmapped.htcondor.org - CLAIMTOBE .* anonymous@claimtobe - FS "^(root|condor)$" \1@daemon.htcondor.org - FS "(.*)" \1 - -1. Copy the remaining contents of `/etc/condor-ce/condor_mapfile.rpmsave` to a file ending in `*.conf` in - `/etc/condor-ce/mapfiles.d/`. - Note that files in this folder are parsed in lexicographic order. - -1. Update the second field of any existing mappings by enclosing any regular expressions in `/`, escaping any slashes - with a backslash (e.g. `\/`). - - - Consider converting any `GSI` mappings into Perl Compatible Regular Expressions (PCRE) since the authenticated - name of incoming proxies may contain additional VOMS FQANs in addition to the Distinguished Name (DN): - - ,,,..., - - For example, to accept a given DN with any VOMS attributes, the mapping should look like the following: - - GSI /^\/DC=org\/DC=cilogon\/C=US\/O=University of Wisconsin-Madison\/CN=Brian Lin A226624,.*/ blin - - Alternatively, to accept any DN from the OSG VO: - - GSI /.*,\/osg\/Role=Pilot\/Capability=.*/ osg - - - Also consider converting `SCITOKENS` mappings to PCRE since the authenticated name of incoming tokens will - contain the token issuer (`iss`) and any token subject (`sub`) fields: - - , - - For example, to accept a token issued by the OSG VO with any subject, write the following mapping: - - SCITOKENS /^https:\/\/scitokens.org\/osg-connect,.*/ osg - -### Specify certificate locations for token authentication ### - -HTCondor-CE 5 adds improved support for accepting pilot jobs submitted with bearer tokens -(e.g., SciTokens or WLCG tokens). -As part of the bearer token authentication, HTCondor-CE uses its host certificate to perform an SSL handshake with the -client to establish trust with its token issuer. -Consult the [authentication documentation](configuration/authentication.md#configuring-certificates) to configure -certificate locations for token authentication. - -### No longer set `$HOME` by default ### - -Older versions of HTCondor-CE set `$HOME` in the routed job to the user's `$HOME` directory on the HTCondor-CE. -To re-enable this behavior, set `USE_CE_HOME_DIR = True` in `/etc/condor-ce/config.d/`. - -HTCondor-CE 5 Version History ------------------------------ - -This section contains release notes for each version of HTCondor-CE 5. -Full HTCondor-CE version history can be found on [GitHub](https://github.com/htcondor/htcondor-ce/releases). - -### 5.1.6 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.6) includes the following changes: - -- HTCondor-CE now uses the C++ Collector plugin for payload job traceability -- Fix HTCondor-CE mapfiles to be compliant with PCRE2 and HTCondor 9.10.0+ -- Add support for multiple APEL accounting scaling factors -- Suppress spurious log message about a missing negotiator -- Fix crash in HTCondor-CE View - -### 5.1.5 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.5) includes the following changes: - -- Rename AuthToken attributes in the routed job to better support accounting -- Prevent GSI environment from pointing the job to the wrong certificates -- Fix issue where HTCondor-CE would need port 9618 open to start up - -### 5.1.4 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.4) includes the following changes: - -- Fix whole node job glidein CPUs and GPUs expressions that caused held jobs -- Fix bug where default CERequirements were being ignored -- Pass whole node request from GlideinWMS to the batch system -- Since CentOS 8 has reached end of life, we build and test on Rocky Linux 8 - -### 5.1.3 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.3) includes the following changes: - -- The HTCondor-CE central collector requires SSL credentials from client CEs -- Fix BDII crash if an HTCondor Access Point is not available -- Fix formatting of APEL records that contain huge values -- HTCondor-CE client mapfiles are not installed on the central collector - -### 5.1.2 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.2) includes the following changes: - -- Fixed the default memory and CPU requests when using job router transforms -- Apply default MaxJobs and MaxJobsIdle when using job router transforms -- Improved SciTokens support in submission tools -- Fixed --debug flag in condor\_ce\_run -- Update configuration verification script to handle job router transforms -- Corrected ownership of the HTCondor PER\_JOBS\_HISTORY\_DIR -- Fix bug passing maximum wall time requests to the local batch system - -### 5.1.1 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.1) includes the following changes: - -- Improve restart time of HTCondor-CE View - ([HTCONDOR-420](https://opensciencegrid.atlassian.net/browse/HTCONDOR-420)) -- Fix bug that caused HTCondor-CE to ignore incoming BatchRuntime requests (#480) -- Fixed error that occurred during RPM installation of non-HTCondor batch systems regarding missing file `batch_gahp` - ([HTCONDOR-504](https://opensciencegrid.atlassian.net/browse/HTCONDOR-504)) - -### 5.1.0 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.1.0) includes the following new features: - -- Add support for [ClassAd transforms](https://htcondor.readthedocs.io/en/lts/classads/transforms.html) - to the JobRouter ([HTCONDOR-243](https://opensciencegrid.atlassian.net/browse/HTCONDOR-243)) -- Add mapped user and X.509 attribute to local HTCondor pool AccountingGroup mappings to Job Routers configured to use - the ClassAd transform syntax ([HTCONDOR-187](https://opensciencegrid.atlassian.net/browse/HTCONDOR-187)) -- Split `condor_mapfile` into files that use regular expressions in `/etc/condor-ce/mapfiles.d/*.conf` - ([HTCONDOR-244](https://opensciencegrid.atlassian.net/browse/HTCONDOR-244)) -- Accept `BatchRuntime` attributes from incoming jobs to set their maximum walltime - ([HTCONDOR-80](https://opensciencegrid.atlassian.net/browse/HTCONDOR-80)) -- Update the HTCondor-CE registry to Python 3 - ([HTCONDOR-307](https://opensciencegrid.atlassian.net/browse/HTCONDOR-307)) -- Enable SSL authentication by default for `READ`/`WRITE` authorization levels - ([HTCONDOR-366](https://opensciencegrid.atlassian.net/browse/HTCONDOR-366)) -- APEL reporting scripts now use history files in the local HTCondor `PER_JOB_HISTORY_DIR` to collect job data. - ([HTCONDOR_293](https://opensciencegrid.atlassian.net/browse/HTCONDOR-293)) -- Use the `GlobalJobID` attribute as the APEL record `lrmsID` - ([#426](https://github.com/htcondor/htcondor-ce/pull/426)) -- Downgrade errors in the configuration verification startup script to support routes written in the transform syntax - ([#465](https://github.com/htcondor/htcondor-ce/pull/465)) -- Allow required directories to be owned by non-`condor` groups - ([#451](https://github.com/htcondor/htcondor-ce/pull/451/files)) - -This release also includes the following bug-fixes: - -- Fix an issue with an overly aggressive default `SYSTEM_PERIODIC_REMOVE` - ([HTCONDOR-350](https://opensciencegrid.atlassian.net/browse/HTCONDOR-350)) -- Fix incorrect path to Python 3 Collector plugin - ([HTCONDOR-400](https://opensciencegrid.atlassian.net/browse/HTCONDOR-400)) -- Fix faulty validation of `JOB_ROUTER_ROUTE_NAMES` and `JOB_ROUTER_ENTRIES` in the startup script - ([HTCONDOR-406](https://opensciencegrid.atlassian.net/browse/HTCONDOR-406)) -- Fix various Python 3 incompatibilities - ([#460](https://github.com/htcondor/htcondor-ce/pull/460)) - -### 5.0.0 ### - -[This release](https://github.com/htcondor/htcondor-ce/releases/tag/v5.0.0) includes the following new features: - -- Python 3 and Enterprise Linux 8 support - ([HTCONDOR_13](https://opensciencegrid.atlassian.net/browse/HTCONDOR-13)) -- HTCondor-CE no longer sets `$HOME` in routed jobs by default - ([HTCONDOR-176](https://opensciencegrid.atlassian.net/browse/HTCONDOR-176)) -- Whole node jobs (local HTCondor batch systems only) now make use of GPUs - ([HTCONDOR-103](https://opensciencegrid.atlassian.net/browse/HTCONDOR-103)) -- HTCondor-CE Central Collectors now prefer GSI over SSL authentication - ([HTCONDOR-237](https://opensciencegrid.atlassian.net/browse/HTCONDOR-237)) -- HTCondor-CE registry now validates the value of submitted client codes - ([HTCONDOR-241](https://opensciencegrid.atlassian.net/browse/HTCONDOR-241)) -- Automatically remove CE jobs that exceed their `maxWalltime` (if defined) or the configuration value of - `ROUTED_JOB_MAX_TIME` (default: 4320 sec/72 hrs) - -This release also includes the following bug-fixes: - -- Fix a circular configuration definition in the HTCondor-CE View that resulted in 100% CPU usage by the - `condor_gangliad` daemon ([HTCONDOR-161](https://opensciencegrid.atlassian.net/browse/HTCONDOR-161)) - - -Getting Help ------------- - -If you have any questions about the release process or run into issues with an upgrade, please -[contact us](../index.md#contact-us) for assistance. diff --git a/docs/v6/releases.md b/docs/v6/releases.md index 9b2a7b951..497c8297b 100644 --- a/docs/v6/releases.md +++ b/docs/v6/releases.md @@ -12,12 +12,6 @@ Known Issues Known bugs affecting HTCondor-CEs can be found in [Jira](https://opensciencegrid.atlassian.net/issues/?jql=project%20%3D%20HTCONDOR%20AND%20status%20not%20in%20(done%2C%20abandoned)%20and%20component%20%3D%20htcondor-ce%20and%20issuetype%20%3D%20bug) -In particular, the following bugs are of note: - -- C-style comments, e.g. `/* comment */`, in `JOB_ROUTER_ENTRIES` will prevent the JobRouter from routing jobs - ([HTCONDOR-864](https://opensciencegrid.atlassian.net/browse/HTCONDOR-864)). - For the time being, remove any comments if you are still using the - [deprecated syntax](configuration/job-router-overview.md#deprecated-syntax). Updating to HTCondor-CE 6 ------------------------- diff --git a/mkdocs.yml b/mkdocs.yml index 3b0031869..a9a67c64b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,6 +19,28 @@ nav: - Home: - 'Overview': 'index.md' - 'Architecture': 'architecture.md' +- HTCondor-CE 23: + - Installation: 'v23/installation/htcondor-ce.md' + - Configuration: + - Authentication: 'v23/configuration/authentication.md' + - Local Batch System: 'v23/configuration/local-batch-system.md' + - Job Router: + - Overview: 'v23/configuration/job-router-overview.md' + - Writing Job Routes: 'v23/configuration/writing-job-routes.md' + - For HTCondor Batch Systems: 'v23/configuration/htcondor-routes.md' + - For Non-HTCondor Batch Systems: 'v23/configuration/non-htcondor-routes.md' + - Optional Configuration: 'v23/configuration/optional-configuration.md' + - Operation: 'v23/operation.md' + - Troubleshooting: + - Common Issues: 'v23/troubleshooting/common-issues.md' + - Debugging Tools: 'v23/troubleshooting/debugging-tools.md' + - Helpful Logs: 'v23/troubleshooting/logs.md' + - Central Grid Operations: + - Submit Jobs Remotely: 'v23/remote-job-submission.md' + - Remote Troubleshooting: 'v23/troubleshooting/remote-troubleshooting.md' + - Install a Central Collector: 'v23/installation/central-collector.md' + - Releases: 'v23/releases.md' + - Reference: 'v23/reference.md' - HTCondor-CE 6: - Installation: 'v6/installation/htcondor-ce.md' - Configuration: @@ -41,28 +63,6 @@ nav: - Install a Central Collector: 'v6/installation/central-collector.md' - Releases: 'v6/releases.md' - Reference: 'v6/reference.md' -- HTCondor-CE 5: - - Installation: 'v5/installation/htcondor-ce.md' - - Configuration: - - Authentication: 'v5/configuration/authentication.md' - - Local Batch System: 'v5/configuration/local-batch-system.md' - - Job Router: - - Overview: 'v5/configuration/job-router-overview.md' - - Writing Job Routes: 'v5/configuration/writing-job-routes.md' - - For HTCondor Batch Systems: 'v5/configuration/htcondor-routes.md' - - For Non-HTCondor Batch Systems: 'v5/configuration/non-htcondor-routes.md' - - Optional Configuration: 'v5/configuration/optional-configuration.md' - - Operation: 'v5/operation.md' - - Troubleshooting: - - Common Issues: 'v5/troubleshooting/common-issues.md' - - Debugging Tools: 'v5/troubleshooting/debugging-tools.md' - - Helpful Logs: 'v5/troubleshooting/logs.md' - - Central Grid Operations: - - Submit Jobs Remotely: 'v5/remote-job-submission.md' - - Remote Troubleshooting: 'v5/troubleshooting/remote-troubleshooting.md' - - Install a Central Collector: 'v5/installation/central-collector.md' - - Releases: 'v5/releases.md' - - Reference: 'v5/reference.md' markdown_extensions: - admonition @@ -77,5 +77,5 @@ plugins: - search - redirects: redirect_maps: - 'v6/verification.md': 'v6/operation.md' + 'v23/verification.md': 'v23/operation.md'