diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..574f7aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +public/ +dist/ +pyxecm.egg-info/ +__pycache__/ +.venv/ +debug/ +__pycache__ +build/ +.env +pyxecm/.vscode/ +~$* diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..2787aa7 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,3 @@ +# Changelog + +--8<-- "CHANGELOG.md" \ No newline at end of file diff --git a/docs/payload-syntax.md b/docs/payload-syntax.md index 20c11a8..3c6926c 100644 --- a/docs/payload-syntax.md +++ b/docs/payload-syntax.md @@ -54,7 +54,7 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme #### partitions -`partitions` allows to create new partitions in OTDS. It is also possible to directly put the new partition into an existing `access role`: +`partitions` allows to create new partitions in OTDS. It is also possible to directly put the new partition into an existing `access role`. Also `licenses` this partition should be assigned to can be specified: === "Terraform / HCL" @@ -66,6 +66,7 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme description = "Salesforce user partition" synced = false access_role = "Access to cs" + licenses = ["X2", "ADDON_AVIATOR", "ADDON_MEDIA"] } ] ``` @@ -79,11 +80,15 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme description: "Salesforce user partition" synced: False access_role: "Access to cs" + licenses: + - "X2" + - "ADDON_AVIATOR" + - "ADDON_MEDIA" ``` #### oauthClients -`oauthClients` allows to create new OAuth client in OTDS. Each list element includes a switch `enabled` to turn them on or off. This switch can be controlled by a Terraform variable. `name` defines the name of the OTDS OAuth client and `description` should describe what the OAuth client is used for. Each OAuth client has the typical elements such as `confidential`, OTDS `partition`, a `redirect_url`, `permission_scope`, `default_scope`, and `allow_impersonation`. If there's a predefined secret it can be provided by `secret`. +`oauthClients` allows to create new OAuth client in OTDS. Each list element includes a switch `enabled` to turn them on or off. This switch can be controlled by a Terraform variable. `name` defines the name of the OTDS OAuth client and `description` should describe what the OAuth client is used for. Each OAuth client has the typical elements such as `confidential` (default is `true`), OTDS `partition` (default is `Global`), a `redirect_url`, `permission_scope`, `default_scope`, and `allow_impersonation`. If there's a predefined secret it can be provided by `secret`. === "Terraform / HCL" @@ -125,7 +130,7 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme #### authHandlers -`authHandlers` is a list of additional OTDS authentication handlers. The values can also use terraform variables. +`authHandlers` is a list of additional OTDS authentication handlers. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. In addition, each handler has a `name`, `type` and an optional `description`. Further values can be specified that depends on the type of the handler. Supported types are `SAML`, `OAUTH`, or `SAP`. The values can also use terraform variables. === "Terraform / HCL" @@ -239,13 +244,14 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme #### systemAttributes -`systemAttributes` allows you to set system attributes in OTDS. Each trusted site has a name, value and an optional description. +`systemAttributes` allows you to set system attributes in OTDS. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. Each system attribute has a `name`, `value` and an optional `description`. === "Terraform / HCL" ```terraform systemAttributes = [ { + enabled = true name = "otds.as.SameSiteCookieVal" value = "None" description = "SameSite Cookie Attribute" @@ -258,13 +264,14 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme ```yaml systemAttributes: - description: SameSite Cookie Attribute + enabled: true name: otds.as.SameSiteCookieVal value: None ``` #### additionalGroupMemberships -`additionalGroupMemberships` allows to put a pre-existing users or groups into existing OTDS groups. Each element consists of a `parent_group` value combined with either a `group_name` or `user_name` value depending whether you wannt to add a user or group. +`additionalGroupMemberships` allows to put a pre-existing users or groups into existing OTDS groups. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, each element consists of a `parent_group` value combined with either a `group_name` or `user_name` value depending whether you wannt to add a user or group. === "Terraform / HCL" @@ -287,7 +294,7 @@ The payload syntax for OTDS customizing uses the following lists (the list eleme #### additionalAccessRoleMemberships -`additionalAccessRoleMemberships` allows to put pre-existing users for groups into existing OTDS Access Roles. Each element consists of a `access_role` value combined with either a `group_name`, `user_name`, or `partition_name` value depending whether you wannt to add a user, group, or a whole OTDS partition to the OTDS Access Role. +`additionalAccessRoleMemberships` allows to put pre-existing users for groups into existing OTDS Access Roles. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, each element consists of a `access_role` value combined with either a `group_name`, `user_name`, or `partition_name` value depending whether you wannt to add a user, group, or a whole OTDS partition to the OTDS Access Role. === "Terraform / HCL" @@ -323,20 +330,24 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen #### groups -`groups` is a list of Extended ECM user groups that are automatically created during the deployment. Each group has a name and (optionally) a list of parent groups. The switch `enabled` is used to turn groups on or off. This switch can be controlled by a Terraform variable. `enable_o365` is used to control whether or not a Microsoft 365 group should be created matching the Extended ECM group. The example below shows two groups. The `Finance` group is a child group of the `Innovate` group. The `Finance` group is also created in Microsoft 365 if the variable `var.enable_o365` evaluates to `true`. +`groups` is a list of Extended ECM user groups that are automatically created during the deployment. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. In addition, each group has a `name` and (optionally) a list of parent groups. `enable_o365`, `enable_salesforce`, and `enable_core_share` are used to control whether or not a Microsoft 365, Salesforce or Core Share group should be created matching the Extended ECM group. The example below shows two groups. The `Finance` group is a child group of the `Innovate` group. The `Finance` group is also created in Microsoft 365 if the variable `var.enable_o365` evaluates to `true`. === "Terraform / HCL" ```terraform groups = [ { - name = "Innovate" - parent_groups = [] + enabled = true + name = "Innovate" + parent_groups = [] }, { - name = "Finance" - parent_groups = ["Innovate"] - enable_o365 = var.enable_o365 + enabled = true + name = "Finance" + parent_groups = ["Innovate"] + enable_o365 = var.enable_o365 + enable_salesforce = var.enable_salesforce + enable_core_share = var.enable_core_share } ] ``` @@ -348,6 +359,8 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen - name: Innovate parent_groups: [] - enable_o365: ${var.enable_o365} + enable_salesforce: ${var.enable_salesforce} + enable_core_share: ${var.enable_core_share} name: Finance parent_groups: - Innovate @@ -355,7 +368,7 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen #### users -`users` is a list of Extended ECM users that are automatically created during deployment. The password of these users is randomly generated and can be printed by `terraform output -json` (all users have the same password). Each user need to have a base group that must be in the `groups` section of the payload. Optionally a user can have a list of additional groups. A user can also have a list of favorites. Favorites can either be the logical name of a workspace instance used in the payload (see workspace below) or it can be a nickname of an Extended item. Users can also have a **security clearance level** and multiple **supplementatal markings**. Both are optional. `security_clearance` is used to define the security clearance level of the user. This needs to match one of the existing security clearnace levels that have been defined in the `securityClearances`section in the payload. `supplemental_markings` defines a list of supplemental markings the user should get. These need to match markings defined in the `supplementalMarkings` section in the payload. The field `privileges` defines the standard privileges of a user. If it is omitted users get the default privileges `["Login", "Public Access"]`. The customizing module is also able to automatically configure Microsoft 365 users for each Extended ECM user. To make this work, the Terraform variable for Office 365 / Microsoft 365 need to be configured. In particular `var.enable_o365` needs to be `true`. In the user settings `enable_o365` has to be set to `true` as well (or you use the variable `var.enable_o365` if the payload is in the `customization.tf` file). `m365_skus` defines a list of Microsoft 365 SKUs that should be assigned to the user. These are the technical SKU IDs that are documented by Microsoft: [Licensing Service Plans](https://learn.microsoft.com/en-us/azure/active-directory/enterprise-users/licensing-service-plan-reference). Inside the `customizing.tf` file you also find a convinient map called `m365_skus` that map the SKU ID to readable names (such as "Microsoft 365 E3" or "Microsoft 365 E5"). +`users` is a list of Extended ECM users that are automatically created during deployment. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. In addition, users should have `name`, `password`, `firstname`, `lastname`, `email`, `title`, and `company`. The `password` of these users can also be randomly generated and can be printed by `terraform output -json` (all users have the same password). Each user need to have a base group that must be in the `groups` section of the payload. Optionally a user can have a list of additional groups. A user can also have a list of favorites. Favorites can either be the logical name of a workspace instance used in the payload (see workspace below) or it can be a nickname of an Extended item. Users can also have a **security clearance level** and multiple **supplementatal markings**. Both are optional. `security_clearance` is used to define the security clearance level of the user. This needs to match one of the existing security clearnace levels that have been defined in the `securityClearances`section in the payload. `supplemental_markings` defines a list of supplemental markings the user should get. These need to match markings defined in the `supplementalMarkings` section in the payload. The field `privileges` defines the standard privileges of a user. If it is omitted users get the default privileges `["Login", "Public Access"]`. The customizing module is also able to automatically configure Microsoft 365 users for each Extended ECM user. To make this work, the Terraform variable for Office 365 / Microsoft 365 need to be configured. In particular `var.enable_o365` needs to be `true`. In the user settings `enable_o365` has to be set to `true` as well (or you use the variable `var.enable_o365` if the payload is in the `customization.tf` file). `m365_skus` defines a list of Microsoft 365 SKUs that should be assigned to the user. These are the technical SKU IDs that are documented by Microsoft: [Licensing Service Plans](https://learn.microsoft.com/en-us/azure/active-directory/enterprise-users/licensing-service-plan-reference). Inside the `customizing.tf` file you also find a convinient map called `m365_skus` that map the SKU ID to readable names (such as "Microsoft 365 E3" or "Microsoft 365 E5"). The `enable_sap`, `enable_successfactors`, `enable_salesforce`, `enable_core_share` allow to automatically create + configure the users in connected SAP S/4HANA, SuccessFactors, Salesforce, and Core Share applications respectively. === "Terraform / HCL" @@ -376,6 +389,10 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen privileges = ["Login", "Public Access", "Content Manager", "Modify Users", "Modify Groups", "User Admin Rights", "Grant Discovery", "System Admin Rights"] enable_o365 = var.enable_o365 m365_skus = [var.m365_skus["Microsoft 365 E3"]] + enable_sap = var.enable_o365 + enable_successfactors = var.enable_o365 + enable_salesforce = var.enable_o365 + enable_core_share = var.enable_o365 extra_attributes = [ { name = "oTExtraAttr0" @@ -395,6 +412,12 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen favorites = ["workspace-b", "nickname-b"] security_clearance = 95 supplemental_markings = ["EU-GDPR-PD", "EUZONE"] + privileges = ["Login", "Public Access"] enable_o365 = var.enable_o365 + m365_skus = [var.m365_skus["Microsoft 365 E5"]] + enable_sap = var.enable_o365 + enable_successfactors = var.enable_o365 + enable_salesforce = var.enable_o365 + enable_core_share = var.enable_o365 } ] ``` @@ -435,6 +458,11 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen title: Administrator - base_group: Sales email: nwheeler@innovate.com + enable_o365: ${var.enable_o365} + enable_sap: ${var.enable_sap} + enable_successfactors: ${var.enable_successfactors} + enable_salesforce: ${var.enable_salesforce} + enable_core_share: ${var.enable_core_share} favorites: - workspace-b - nickname-b @@ -445,6 +473,9 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen lastname: Wheeler name: nwheeler password: ${local.password} + privileges: + - Login + - Public Access security_clearance: 95 supplemental_markings: - EU-GDPR-PD @@ -454,13 +485,14 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen #### items -`items` and `itemsPost` are lists of Extended ECM items such as folders, shortcuts or URLs that should be created automatically but are not included in transports. All items are created in the `Enterprise Workspace` of Extended ECM or any subfolder. Each item needs to have `name` and `type` values. The parent ID of the item can either be specified by a nick name (`parent_nickname`) or by the path in the Enterprise Workspace (`parent_path`). The value `parent_path` is a list of folder names starting from the root level in the Enterprise Workspaces. `parent_path = ["Administration", "WebReports"]` creates the item in the `Websites` folder which is itself in the `Administration` top-level folder. The list `items` is processed at the beginning of the automation (before transports are applied) and `itemsPost` is applied at the end of the automation (after transports have been applied). +`items` and `itemsPost` are lists of Extended ECM items such as folders, shortcuts or URLs that should be created automatically but are not included in transports. All items are created in the `Enterprise Workspace` of Extended ECM or any subfolder. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. In addition, each item needs to have `name` and `type` values. The parent ID of the item can either be specified by a nick name (`parent_nickname`) or by the path in the Enterprise Workspace (`parent_path`). The value `parent_path` is a list of folder names starting from the root level in the Enterprise Workspaces. `parent_path = ["Administration", "WebReports"]` creates the item in the `Websites` folder which is itself in the `Administration` top-level folder. The list `items` is processed at the beginning of the automation (before transports are applied) and `itemsPost` is applied at the end of the automation (after transports have been applied). === "Terraform / HCL" ```terraform items = [ { + enabled = true parent_nickname = "" # empty string = not set parent_path = ["Administration", "WebReports"] name = "Case Management" @@ -494,6 +526,7 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen ```yaml items: - description: Case Management with eFiles and eCases + enable: true name: Case Management original_nickname: 0 original_path: [] @@ -505,6 +538,7 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen url: '' itemsPost: - description: The OpenText web site + enabled: true name: OpenText Homepage original_nickname: 0 original_path: [] @@ -519,13 +553,14 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen #### permissions -`permissions` and `permissionsPost` are both lists of Exteneded ECM items, each with a specific permission set that should be applied to the item. The item can be specified via a path (list of folder names in Enterprise workspace in top-down order), via a nickname, or via a volume. Permission values are listed as list strings in `[...]` for `owner_permissions`, `owner_group_permissions`, or `public_permissions`. They can be a combination of the following values: `see`, `see_contents`, `modify`, `edit_attributes`, `add_items`, `reserve`, `add_major_version`, `delete_versions`, `delete`, and `edit_permissions`. The `apply_to` specifies if the permissions should only be applied to the item itself (value 0) or only to sub-items (value 1) or the item _and_ its sub-items (value 2). The list specified by `permissions` is applied _before_ the transport packages are applied and `permissionsPost` is applied _after_ the transport packages have been processed. +`permissions` and `permissionsPost` are both lists of Exteneded ECM items, each with a specific permission set that should be applied to the item. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. In addition, the item can be specified via a path (list of folder names in Enterprise workspace in top-down order), via a nickname, or via a volume. Permission values are listed as list strings in `[...]` for `owner_permissions`, `owner_group_permissions`, or `public_permissions`. They can be a combination of the following values: `see`, `see_contents`, `modify`, `edit_attributes`, `add_items`, `reserve`, `add_major_version`, `delete_versions`, `delete`, and `edit_permissions`. The `apply_to` specifies if the permissions should only be applied to the item itself (value 0) or only to sub-items (value 1) or the item _and_ its sub-items (value 2). The list specified by `permissions` is applied _before_ the transport packages are applied and `permissionsPost` is applied _after_ the transport packages have been processed. === "Terraform / HCL" ```terraform permissions = [ { + enabled = true path = ["...", "..."] volume = "..." # identified by volume type ID nickname = "..." # an item with this nick name needs to exist @@ -554,6 +589,7 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen ```yaml permissions: - apply_to: 2 + enabled: true groups: - name: '...' permissions: [] @@ -574,18 +610,20 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen #### renamings -`renamings` is a list of Extended ECM items (e.g. volume names) that are automatically renamed during deployment. You have to either provide the `nodeid` (only a few node IDs are really know upfront such as 2000 for the Enterprise Workspace) or a `volume` (type ID). In case of volumes there's a list of known volume types defined at the beginning of the `customizing.tf` file with the variable `otcs_volumes`. You can also specific a description that will be used to update the description of the node / item. +`renamings` is a list of Extended ECM items (e.g. volume names) that are automatically renamed during deployment. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable. In addition, you have to either provide the `nodeid` (only a few node IDs are really know upfront such as 2000 for the Enterprise Workspace) or a `volume` (type ID). In case of volumes there's a list of known volume types defined at the beginning of the `customizing.tf` file with the variable `otcs_volumes`. You can also specific a description that will be used to update the description of the node / item. === "Terraform / HCL" ```terraform renamings = [ { + enabled = true nodeid = 2000 name = "Innovate" description = "Innovate's Enterprise Workspace" }, { + enabled = true volume = var.otcs_volumes["Content Server Document Templates"] name = "Content Server Document Templates" description = "Extended ECM Workspace and Document Templates" @@ -598,17 +636,18 @@ The payload syntax for Extended ECM configurations uses these lists (most elemen ```yaml renamings: - description: Innovate's Enterprise Workspace + enabled: true name: Innovate nodeid: 2000 - description: Extended ECM Workspace and Document Templates + enabled: true name: Content Server Document Templates volume: ${var.otcs_volumes["Content Server Document Templates"]} ``` #### adminSettings -`adminSettings` and `adminSettingsPost` are lists admin stettings that are applied before the transport packages (`adminSettings`) or directly after the transport packages (`adminSettingsPost`) in the customizing process. Each setting is defined by a `description`, the `filename` of an XML file that includes the actual Extended ECM admin settings that are applied automatically (using XML import / LLConfig). These files need to be stored inside the `setting/payload` sub-folder inside the terraform folder. -Each admin setting may have a field called `enabled` that allows to dyanmically turn on / off admin settings based on a boolean value that may be read from a Terraform variable (or could just be `False` or `True`). +`adminSettings` and `adminSettingsPost` are lists admin stettings that are applied before the transport packages (`adminSettings`) or directly after the transport packages (`adminSettingsPost`) in the customizing process. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, each setting is defined by a `description`, the `filename` of an XML file that includes the actual Extended ECM admin settings that are applied automatically (using XML import / LLConfig). These files need to be stored inside the `setting/payload` sub-folder inside the terraform folder. === "Terraform / HCL" @@ -651,8 +690,8 @@ Each admin setting may have a field called `enabled` that allows to dyanmically #### externalSystems -`externalSystems` is a list of connections to external business applications such as SAP S/4HANA, Salesforce, or SuccessFactors. Some of the fields are common, some are specific for the type of the external system. -Each external system has a field called `enabled` that allows to dyanmically turn on / off external system configurations based on a boolean value that may be read from a Terraform variable (or could just be `False` or `True`). The field `external_system_type` needs to have one of these values: `SAP`, `Salesforce`, `SuccessFactors`, or `AppWorks Platform`. +`externalSystems` is a list of connections to external business applications such as SAP S/4HANA, Salesforce, or SuccessFactors. Some of the payload elements are common, some are specific for the type of the external system. +Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, the field `external_system_type` needs to have one of these values: `SAP`, `Salesforce`, `SuccessFactors`, `AppWorks Platform` or `Business Scenario Sample`. All other fields are dependent on the selection of the `type` value. === "Terraform / HCL" @@ -752,7 +791,7 @@ Each external system has a field called `enabled` that allows to dyanmically tur #### transportPackages -`transportPackages` is a list of transport packages that should be applied automatically. These packages need to be accessible via the provided URLs. The `name` must be the exact file name of the ZIP package. Description is optional. +`transportPackages` is a list of transport packages that should be applied automatically. These packages need to be accessible via the provided URLs. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, the `name` must be the exact file name of the ZIP package. A value for `description` is optional. === "Terraform / HCL" @@ -847,7 +886,7 @@ Each external system has a field called `enabled` that allows to dyanmically tur #### workspaces -`workspaces` is a list of business workspaces instances that should be automatically created. Category, Roles, and Business Relationships can be provided. The `id` needs to be a unique value in the payload. It does not need to be something related to any of the actual Extended ECM workspace data. It is only used to establish relationship between different workspaces in the payload (using the list of IDs in `relationships`). **_Important_**: If the workspace type definition uses a pattern to generate the workspace name then the `name` in the payload should match the pattern in the workspace definition. Otherwise incremental deployments of the payload may not find the existing workspaces and may try to recreate them resulting in an error. The `nickname` is the Extended ECM nickname that allows to refer to this itemwithout knowing its technical ID. +`workspaces` is a list of business workspaces instances that should be automatically created. Category, Roles, and Business Relationships can be provided. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, the `id` needs to be a unique value in the payload. It does not need to be something related to any of the actual Extended ECM workspace data. It is only used to establish relationship between different workspaces in the payload (using the list of IDs in `relationships`). **_Important_**: If the workspace type definition uses a pattern to generate the workspace name then the `name` in the payload should match the pattern in the workspace type definition. Otherwise incremental deployments of the payload may not find the existing workspaces and may try to recreate them resulting in an error. The `nickname` is the Extended ECM nickname that allows to refer to this item without knowing its technical ID. Business Object information can be provided with a `business_objects` list. Each list item defines the external system (see above), the business object type, and business object ID. This list is optional. @@ -857,7 +896,7 @@ Classification information is optional and can be provided separately for Record Category information is provided in a list of blocks. Each block includes the category `name`, `set` name (optional, can be empty of the attribute is not in a set), `attribute` name, and the attribute `value`. Multi-value attributes are a comma-separated list of items in square brackets. The example below shows a customer workspace and a contract workspace that are related to each other (the customer workspace has an attribute `Sales Organization` that has multiple values: 1000 and 2000). The contract workspace has a multi-line attribute set. For multi-line attribute sets the payload needs an additional `row` value that specifies the row number in the multi-line set (starting with 1 for the first row). -A thrid workspace in the example below is for `Material` - it has an additional field called `template_name` which is optional. It can be used if there are multiple templates for one workspace type. If it is not specified and the workspace type has multiple workspace templates the first template is automatically selected. +A third workspace in the example below is for `Material` - it has an additional field called `template_name` which is optional. It can be used if there are multiple templates for one workspace type. If it is not specified and the workspace type has multiple workspace templates the first template is automatically selected. === "Terraform / HCL" @@ -1070,7 +1109,7 @@ A thrid workspace in the example below is for `Material` - it has an additional #### webReports -`webReports` and `webReportsPost` are two lists of Extended ECM web reports that should be automatically executed during deployment. Having two lists give you the option to run some webReports after the business configuration and some others after demo content has been produced. These Web Reports have typically been deployd to Extended ECM system with the transport warehouse before. Each list item specifies one Web Report. The `nickname` is mandatory and defines the nickname of the Web Report to be executed. So you need to give each webReport you want to run a nickname before putting it in a transport package. The element `description` is optional. The `parameters` set defines parameter name and parameter value pairs. The corresponding Web Report in Extended ECM must have exactly these parameters defined. +`webReports` and `webReportsPost` are two lists of Extended ECM web reports that should be automatically executed during deployment. Having two lists give you the option to run some webReports after the business configuration and some others after demo content has been produced. These Web Reports have typically been deployd to Extended ECM system with the transport warehouse before. Each list item specifies one Web Report. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, the `nickname` is mandatory and defines the nickname of the Web Report to be executed. So you need to give each webReport you want to run a nickname before putting it in a transport package. The element `description` is optional. The `parameters` set defines parameter name and parameter value pairs. The corresponding Web Report in Extended ECM must have exactly these parameters defined. === "Terraform / HCL" @@ -1116,7 +1155,7 @@ A thrid workspace in the example below is for `Material` - it has an additional #### csApplications -`csApplications` is a list of Content Server Applications that should autmatically be deployed. Each element has a `name` for the application and optionally a `description`. +`csApplications` is a list of Content Server Applications that should autmatically be deployed. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). In addition, each element has a `name` for the application and optionally a `description`. === "Terraform / HCL" @@ -1151,7 +1190,7 @@ A thrid workspace in the example below is for `Material` - it has an additional #### assignments -`assignments` is a list of assignments. Assignments are typically used for _Extended ECM for Government_. Each assignment assigns either a `workspace` or an Extended ECM item with a `nickname` to a defined list of `users` or `groups`. Assignments have a `subject` (title) and `instructions` for the target users or groups. +`assignments` is a list of assignments. Assignments are typically used for _Extended ECM for Government_. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). Each assignment assigns either a `workspace` or an Extended ECM item with a `nickname` to a defined list of `users` or `groups`. Assignments have a `subject` (title) and `instructions` for the target users or groups. === "Terraform / HCL" @@ -1184,29 +1223,280 @@ A thrid workspace in the example below is for `Material` - it has an additional ``` -#### workspaceTemplateRegistrations - -`workspaceTemplateRegistrations` is used to register certain workspace templates for the use as projects in _Extended ECM for Engineering_ demo scenarios. Each registration has two mandatory fields. `workspace_type_name` defines the name of the workspace type and `workspace_template_name` defines the specific name of the workspace templates (as each workspace type may have multiple templates). +### Bulk Load Customizing Syntax + +For mass loading and generation of workspaces and documents from external data sources the customizing allows to specify bulk datasources, bulk workspaces, bulk workspace relationships and bulk documents. The data sources will be loaded in an internal table representation (we use Pandas Data Frames for this). + +#### Bulk Data Sources + +Before you can bulk load workspaces, workspace relationships, or documents you have to declare the used data sources. `bulkDatasources` is a list of datasources. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). First, a data source needs a `type`. Supported types are `excel` (Microsoft Excel workbooks), `servicenow` (ServiceNow REST API), `otmm` (OpenText Media Management REST API), `otcs` (Extended ECM REST API), `pht` (internal OpenText System for Product Master Data, REST API), `json` (JSON files), and `xml` (XML files, or whole directories / zip files of XML files). Based on the selected `type` data sources may have many specific fields to configure the specifics of the data source and define how to connect to the data source. + +In general, there are a couple of settings that can be applied to all data sources: + +- `cleansings` (dictionary, optional, default = {}) to clean the values in defined columns of the data set. Each list item is a dictionary with these keys: + - `upper` (bool, optional, default = False) + - `lower` (bool, optional, default = False) + - `length` (int, optional, default = None) + - `replacements` (dict, optional, default = {}) - the keys are regular expressions and the values are replacement values +- `columns_to_drop` (list, optional, default = []) to remove columns from the data set (black list those to delete) +- `columns_to_keep` (list, optional, default = []) to keep columns in the data set (white list those to keep) +- `columns_to_add` (list, optional, default = []) - each list item is a dictionary with these keys: + - `source_column` (str, mandatory) - name of the column the base value for the new column is taken from + - `name` (str, mandatory) - name of the new column + - `reg_exp` (str, optional, default = None) + - `prefix` (str, optional, default = "") - prefix to add to the new column value + - `suffix` (str, optional, default = "") - suffix to add to the new column value + - `length` (int, optional, default = None) + - `group_chars` (str, optional, default = None) + - `group_separator` (str, optional, default =".") +- `conditions` (list, optional, default = []) - each list item is a dict with these keys: + - `field` (str, mandatory) + - `value` (str | bool | list, optional, default = None) +- `explosions` (list, optional, default = []) - each list item is a dict with these keys: + - `explode_field` (str | list, mandatory) + - `flatten_fields` (list, optional, default = []) + - `split_string_to_list` (bool, optional, default = False) +- `name_column` (str, optional, default = None) - name of the column in the data source that determines the bulk item name +- `synonyms_column` (str, optional, default = None) + +OpenText Extended ECM / Content Server specific settings (fields): + +- `otcs_hostname` (str, mandatory) +- `otcs_protocol` (str, optional, default = "https") +- `otcs_port` (str, optional, default = "443") +- `otcs_basepath` (str, optional, default = "/cs/cs") +- `otcs_username` (str, mandatory) +- `otcs_password` (str, mandatory) +- `otcs_thread_number` (int, optional, default = BULK_THREAD_NUMBER) +- `otcs_download_dir` (str, optional, default = "/data/contentserver") +- `otcs_root_node_id` (int, mandatory) +- `otcs_filter_workspace_depth` (int, optional, default = 0) - 0 = workspaces are located immedeately below given root node +- `otcs_filter_workspace_subtypes` (list, optional, default = []) - 0 = folder subtype +- `otcs_filter_workspace_category` (str, optional, default = None) +- `otcs_filter_workspace_attributes` (dict | list, optional, default = None) + +ServiceNow specific settings (fields): + +- `sn_base_url` (str, mandatory) +- `sn_auth_type` (str, optional, default = "basic") +- `sn_username` (str, optional, default = "") +- `sn_password` (str, optional, default = "") +- `sn_client_id` (str, optional, default = None) +- `sn_client_secret` (str, optional, default = None) +- `sn_table_name` (str, optional, default = "u_kb_template_technical_article_public") +- `sn_query` (str, optional, default = None) +- `sn_thread_number` (int, optional, default = BULK_THREAD_NUMBER) +- `sn_download_dir` (str, optional, default = "/data/knowledgebase") + +OpenText Media management specific settings (fields): + +- `otmm_username` (str, optional, default = "") +- `otmm_password` (str, optional, default = "") +- `otmm_client_id` (str, optional, default = None) +- `otmm_client_secret` (str, optional, default = None) +- `otmm_thread_number` (int, optional, default = BULK_THREAD_NUMBER) +- `otmm_download_dir` (str, optional, default = "/data/mediaassets") +- `otmm_business_unit_exclusions` (list, optional, default = []) +- `otmm_product_exclusions` (list, optional, default = []) === "Terraform / HCL" ```terraform - workspaceTemplateRegistrations = [ - { - workspace_type_name = "SAP PPM Project" - workspace_template_name = "Project" + bulkDatasources = [ + { + enabled = true + name = "ntsb" + description = "NTSB Data Source from https://www.ntsb.gov" + type = "json" + json_files = ["/datasources/ntsb-2024-01.json", "/datasources/ntsb-2024-02.json", "/datasources/ntsb-1962-2023.json"] + + # columns to keep. If empty we keep all columns + columns_to_keep = [ + "cm_mkey", + "cm_ntsbNum", + "...", + ] + # columns to drop. If empty we drop no columns + columns_to_drop = [] + explosions = [ + { + explode_field = "cm_vehicles" + flatten_fields = ["make", "model", "operatorName"] + } + ] + conditions= [ + { + "field": "cm_vehicles_operatorName", + "value": [ + "AIR CANADA", + "AIR CHINA", + "..." + ], + "regex": false, + }, + ] + + cleansings = { + "airportName": { + "upper": true + "replacements" : { + "-": " ", # replace hypen with space + "/": " ", # replace slash with space + " AIRPORT$": "", # remove " AIRPORT" at the end of names + " AIRPOR$": "", # remove " AIRPOR" at the end of names + " ARPT$": "", # remove " ARPT" at the end of names + " AIRP$": "", # remove " AIRP" at the end of names + " A$": "", # remove " A" at the end of names (abbreviation for Airport) + } + } } - ] + } + ] ``` -=== "YAML" +#### Bulk Workspaces + +To bulk load workspaces you can define a payload section `bulkWorkspaces` which can produce a large number of workspaces based on placeholders that are filled with data from a defined datasource. Each list element can include a switch called `enabled` to turn them on or off (the default is `true`). This switch can be controlled by a Terraform variable (or could just be `false` or `true`). First, a data source needs a `data_source` that specifies the name of a data source in the `bulkDatasources` payload section. + +These are the settings in a single bulkDatasource list element: + +- `enabled` (bool, optional, default = True) +- `type_name` (str, mandatory) - type of the workspace +- `data_source` (str, mandatory) +- `force_reload` (bool, optional, default = True) +- `enforce_updates` (bool, optional, default = False) +- `unique` (list, optional, default = []) - list of fields (columns) that should be unique -> deduplication +- `sort` (list, optional, default = []) - list of fields to sort the data frame by +- `name` (str, mandatory) +- `description` (str, optional, default = "") +- `template_name` (str, optional, default = take first template) +- `categories` (list, optional, default = []) - each list item is a dictionary that may have these keys: + - `name` (str, mandatory) + - `set` (str, default = "") + - `row` (int, optional) + - `attribute` (str, mandatory) + - `value` (str, optional if value_field is specified, default = None) + - `value_field` (str, optional if value is specified, default = None) - can include placeholder surrounded by {...} + - `value_type` (str, optional, default = "string") - values can be string or list, if list then string with comma-separated values will be converted to a list + - `list_splitter` (str, optional, default = ";,") + - `lookup_data_source` (str, optional, default = None) + - `is_key` (bool, optional, default = False) - find document with old name. For this we expect a "key" value to be defined in the bulk workspace and one of the category / attribute item to be marked with "is_key" = True +- `external_create_date` (str, optional, default = "") +- `external_modify_date` (str, optional, default = "") +- `key` (str, optional, default = None) - lookup key for workspaces other then the name +- `replacements` (dict, optional, default = {}) - Each dictionary item has the field name as the dictionary key and a list of regular expressions as dictionary value + - `nickname` (str, optional, default = None) + - `conditions` (list, optional, default = []) - each list item is a dictionary that may have these keys: + - `field` (str, mandatory) + - `value` (str | bool | list, optional, default = None) - ```yaml - workspaceTemplateRegistrations: - - workspace_template_name: Project - workspace_type_name: SAP PPM Project +=== "Terraform / HCL" + + ```terraform + bulkWorkspaces = [ + { + data_source = "ntsb" + name = "{airportName} ({airportId})" + nickname = "ws_location_{airportName}_{airportId}" + description = "" + type_name = "Location" + template_name = "Location" + conditions = [ + { + field = "{airportName}" + }, + { + field = "{airportId}" + } + ] + unique = ["airportName", "airportId"] + sort = ["airportName"] # sorting may help to avoid name clashes between threads + replacements = {} # no "local" replacements + }, + { + data_source = "ntsb" + name = "{cm_vehicles.make}" + nickname = "ws_manufacturer_{cm_vehicles.make}" + description = "" + type_name = "Manufacturer" + template_name = "Manufacturer" + conditions = [ + { + field = "{cm_mode}" + value = "Aviation" + }, + { + field = "{cm_vehicles.make}" + } + ] + unique = ["cm_vehicles_make"] + sort = ["cm_vehicles_make"] # sorting may help to avoid name clashes between threads + replacements = {} # no "local" replacements + }, + { + data_source = "ntsb" + name = "{cm_vehicles.operatorName}" + nickname = "ws_operator_{cm_vehicles.operatorName}" + description = "" + type_name = "Operator" + template_name = "Operator" + conditions = [ + { + field = "{cm_vehicles.operatorName}" + } + ] + unique = ["cm_vehicles_operatorName"] # we must have an underscore here as this is a generated top-level field + sort = ["cm_vehicles_operatorName"] # sorting may help with avoiding name clashes between threads + replacements = {} # no "local" replacements + }, + { + data_source = "ntsb" + name = "{cm_ntsbNum}" + nickname = "ws_incident_{cm_ntsbNum}" + description = "" + type_name = "Incident" + template_name = "Incident" + unique = ["cm_ntsbNum"] # the explosion may generate multiple lines for one NTSB number + replacements = {} # no "local" replacements + categories = [ + { + name = "Incident" + set = "" + attribute = "Key" + value_field = "{cm_mkey}" + }, + { + name = "Incident" + set = "" + attribute = "Status" + value_field = "{cm_completionStatus}" + }, + { + name = "Incident" + set = "" + attribute = "Has Safety Recommendation" + value_field = "{cm_hasSafetyRec}" + }, + { + name = "Incident" + set = "" + attribute = "Highest Injury Level" + value_field = "{cm_highestInjury}" + }, + ... + ] + }, + ] ``` +#### Bulk Workspace Relationships + +To be completed... + +#### Bulk Documents + +To be completed... + ### Advanced Customizing Syntax For advanced use cases that are not covered by Extended ECM or OTDS APIs, there are additional customizing capabilities. diff --git a/docs/pyxecm/coreshare.md b/docs/pyxecm/coreshare.md new file mode 100644 index 0000000..afc4f0a --- /dev/null +++ b/docs/pyxecm/coreshare.md @@ -0,0 +1 @@ +::: pyxecm.coreshare diff --git a/docs/pyxecm/otmm.md b/docs/pyxecm/otmm.md new file mode 100644 index 0000000..d214b7c --- /dev/null +++ b/docs/pyxecm/otmm.md @@ -0,0 +1 @@ +::: pyxecm.otmm diff --git a/docs/pyxecm/successfactors.md b/docs/pyxecm/successfactors.md new file mode 100644 index 0000000..11b1564 --- /dev/null +++ b/docs/pyxecm/successfactors.md @@ -0,0 +1 @@ +::: pyxecm.customizer.successfactors diff --git a/mkdocs.yml b/mkdocs.yml index 6e9a635..65e0286 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,6 +1,6 @@ site_name: pyxecm -site_url: https://example.com/ +site_url: https://opentext.github.io/pyxecm/ theme: name: "material" @@ -53,12 +53,16 @@ nav: - Archive Center (OTAC): pyxecm/otac.md - Intelligent Viewing (OTIV): pyxecm/otiv.md - PowerDocs (OTPD): pyxecm/otpd.md + - Media Management (OTMM): pyxecm/otmm.md + - Core Share: pyxecm/coreshare.md - Customizer Python Classes: - Payload: pyxecm/payload.md - Customizer: pyxecm/customizer.md - Browser Automation: pyxecm/browser.md - K8s: pyxecm/k8s.md - Microsoft 365: pyxecm/m365.md + - SuccessFactors: pyxecm/successfactors.md + - Salesforce: pyxecm/salesforce.md - SAP: pyxecm/sap.md - Translate: pyxecm/translate.md - Helper Python Classes: diff --git a/pyproject.toml b/pyproject.toml index 83a1031..42b6a7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ dependencies = [ 'python-hcl2', 'xmltodict', 'lxml', + 'openpyxl', + 'pandas', ] keywords = [ "opentext", diff --git a/pyxecm/__init__.py b/pyxecm/__init__.py index 5f6da10..740a84f 100644 --- a/pyxecm/__init__.py +++ b/pyxecm/__init__.py @@ -1,6 +1,9 @@ """pyxecm - A python library to interact with Opentext Extended ECM REST API.""" + from .otac import OTAC from .otcs import OTCS from .otds import OTDS from .otiv import OTIV from .otpd import OTPD +from .otmm import OTMM +from .coreshare import CoreShare diff --git a/pyxecm/coreshare.py b/pyxecm/coreshare.py new file mode 100644 index 0000000..9847e1a --- /dev/null +++ b/pyxecm/coreshare.py @@ -0,0 +1,2636 @@ +""" +CoreShare Module to interact with the Core Share API +See: https://confluence.opentext.com/pages/viewpage.action?spaceKey=OTC&title=APIs+Consumption+based+on+roles + +Authentication - get Client Secrets: +1. Login to Core Share as a Tenant Admin User . +2. Navigate to Security P age. +3. On OAuth Confidential Clients section provide Description and Redirect URLs. It will populate a +dialog with Client Secret. +4. Copy Client Secret as it will not be available anywhere once the dialog is closed. + +Class: CoreShare +Methods: + +__init__ : class initializer +config : Returns config data set +credentials: Get credentials (username + password) +set_credentials: Set the credentials for Core Share based on username and password. +request_header: Returns the request header for Core Share API calls +parse_request_response: Parse the REST API responses and convert + them to Python dict in a safe way +lookup_result_value: Lookup a property value based on a provided key / value pair in the response + properties of a Core Share REST API call +exist_result_item: Check if an dict item is in the response + of the Core Share API call +get_result_value: Check if a defined value (based on a key) is in the Core Share API response + +authenticate_admin : Authenticates as Admin at Core Share API +authenticate_user : Authenticates as Service user at Core Share API + +get_groups: Get Core Share groups. +add_group: Add a new Core Share group. +get_group_members: Get Core Share group members. +add_group_member: Add a Core Share user to a Cire Share group. +remove_group_member: Remove a Core Share user from a Core Share group. +get_group_by_id: Get a Core Share group by its ID. +get_group_by_name: Get Core Share group by its name. +search_groups: Search Core Share group(s) by name. + +get_users: Get Core Share users. +get_user_by_id: Get a Core Share user by its ID. +get_user_by_name: Get Core Share user by its first and last name. +search_users: Search Core Share user(s) by name / property. +add_user: Add a new Core Share user. This requires a Tenent Admin authorization. +resend_user_invite: Resend the invite for a Core Share user. +update_user: Update a Core Share user. +add_user_access_role: Add an access role to a Core Share user. +remove_user_access_role: Remove an access role from a Core Share user. +update_user_access_roles: Define the access roles of a Core Share user. +update_user_password: Update the password of a Core Share user. +update_user_photo: Update the Core Share user photo. + +get_folders: Get Core Share folders under a given parent ID. +unshare_folder: Unshare Core Share folder with a given resource ID. +delete_folder: Delete Core Share folder with a given resource ID. +delete_document: Delete Core Share document with a given resource ID. +leave_share: Remove a Core Share user from a share (i.e. the user leaves the share) +stop_share: Stop of share of a user. +cleanup_user_files: Cleanup all files of a user. This handles different types of resources. +get_group_shares: Get (incoming) shares of a Core Share group. +revoke_group_share: Revoke sharing of a folder with a group. +cleanup_group_shares: Cleanup all incoming shares of a group. +""" + +__author__ = "Dr. Marc Diefenbruch" +__copyright__ = "Copyright 2024, OpenText" +__credits__ = ["Kai-Philip Gatzweiler"] +__maintainer__ = "Dr. Marc Diefenbruch" +__email__ = "mdiefenb@opentext.com" + +import os +import json +import logging +import urllib.parse + +import requests + +logger = logging.getLogger("pyxecm.customizer.coreshare") + +REQUEST_LOGIN_HEADERS = { + "Content-Type": "application/x-www-form-urlencoded", + "Accept": "application/json", +} + +REQUEST_TIMEOUT = 60 + + +class CoreShare(object): + """Used to retrieve and automate stettings in Core Share.""" + + _config: dict + _access_token_user = None + _access_token_admin = None + + def __init__( + self, + base_url: str, + sso_url: str, + client_id: str, + client_secret: str, + username: str, + password: str, + ): + """Initialize the CoreShare object + + Args: + base_url (str): base URL of the Core Share tenant + sso_url (str): Single Sign On URL of the Core Share tenant + client_id (str): Core Share Client ID + client_secret (str): Core Share Client Secret + username (str): admin user name in Core Share + password (str): admin password in Core Share + """ + + core_share_config = {} + + # Store the credentials and parameters in a config dictionary: + core_share_config["clientId"] = client_id + core_share_config["clientSecret"] = client_secret + core_share_config["username"] = username + core_share_config["password"] = password + + # Set the Core Share URLs and REST API endpoints: + core_share_config["baseUrl"] = base_url + core_share_config["ssoUrl"] = sso_url + core_share_config["restUrlv1"] = core_share_config["baseUrl"] + "/api/v1" + core_share_config["restUrlv3"] = core_share_config["baseUrl"] + "/api/v3" + core_share_config["groupsUrl"] = core_share_config["restUrlv1"] + "/groups" + core_share_config["usersUrlv1"] = core_share_config["restUrlv1"] + "/users" + core_share_config["usersUrlv3"] = core_share_config["restUrlv3"] + "/users" + core_share_config["invitesUrl"] = core_share_config["restUrlv1"] + "/invites" + core_share_config["foldersUrlv1"] = core_share_config["restUrlv1"] + "/folders" + core_share_config["foldersUrlv3"] = core_share_config["restUrlv3"] + "/folders" + core_share_config["documentsUrlv1"] = ( + core_share_config["restUrlv1"] + "/documents" + ) + core_share_config["documentsUrlv3"] = ( + core_share_config["restUrlv3"] + "/documents" + ) + core_share_config["searchUrl"] = core_share_config["baseUrl"] + "/search/v1" + core_share_config["searchUserUrl"] = core_share_config["searchUrl"] + "/user" + core_share_config["searchGroupUrl"] = ( + core_share_config["searchUrl"] + "/user/group-all" + ) + + core_share_config["sessionsUrl"] = core_share_config["restUrlv1"] + "/sessions" + core_share_config["tokenUrl"] = ( + core_share_config["ssoUrl"] + "/otdsws/oauth2/token" + ) + core_share_config["sessionsUrl"] = core_share_config["restUrlv1"] + "/sessions" + + # Tenant Admin User Authentication information (Session URL): + core_share_config["authorizationUrlAdmin"] = ( + core_share_config["sessionsUrl"] + + "?client={'type':'web'}" + + "&email=" + + urllib.parse.quote(username) + + "&password=" + + urllib.parse.quote(password) + ) + + # Tenant Service User Authentication information: + core_share_config["authorizationUrlCredentials"] = ( + core_share_config["tokenUrl"] + + "?client_id=" + + client_id + + "&client_secret=" + + client_secret + + "&grant_type=client_credentials" + ) + core_share_config["authorizationUrlPassword"] = ( + core_share_config["tokenUrl"] + + "?client_id=" + + client_id + + "&client_secret=" + + client_secret + + "&grant_type=password" + + "&username=" + + urllib.parse.quote(username) + + "&password=" + + urllib.parse.quote(password) + ) + + self._config = core_share_config + + # end method definition + + def config(self) -> dict: + """Returns the configuration dictionary + + Returns: + dict: Configuration dictionary + """ + return self._config + + # end method definition + + def credentials(self) -> dict: + """Get credentials (username + password) + + Returns: + dict: dictionary with username and password + """ + return { + "username": self.config()["username"], + "password": self.config()["password"], + } + + # end method definition + + def set_credentials(self, username: str = "admin", password: str = ""): + """Set the credentials for Core Share based on username and password. + + Args: + username (str, optional): Username. Defaults to "admin". + password (str, optional): Password of the user. Defaults to "". + """ + + logger.info("Change Core Share credentials to user -> %s...", username) + + self.config()["username"] = username + self.config()["password"] = password + + # As the Authorization URLs include username password + # we have to update them as well: + self.config()["authorizationUrlAdmin"] = ( + self.config()["sessionsUrl"] + + "?client={'type':'web'}" + + "&email=" + + urllib.parse.quote(username) + + "&password=" + + urllib.parse.quote(password) + ) + + self.config()["authorizationUrlPassword"] = ( + self.config()["tokenUrl"] + + "?client_id=" + + self.config()["clientId"] + + "&client_secret=" + + self.config()["clientSecret"] + + "&grant_type=password" + + "&username=" + + urllib.parse.quote(username) + + "&password=" + + urllib.parse.quote(password) + ) + + # end method definition + + def request_header_admin(self, content_type: str = "application/json") -> dict: + """Returns the request header used for Application calls. + Consists of Bearer access token and Content Type + + Args: + content_type (str, optional): content type for the request + Return: + dict: request header values + """ + + request_header = { + "Authorization": "Bearer {}".format(self._access_token_admin), + } + if content_type: + request_header["Content-Type"] = content_type + + return request_header + + # end method definition + + def request_header_user(self, content_type: str = "application/json") -> dict: + """Returns the request header used for Application calls. + Consists of Bearer access token and Content Type + + Args: + content_type (str, optional): content type for the request + Return: + dict: request header values + """ + + request_header = { + "Authorization": "Bearer {}".format(self._access_token_user), + } + if content_type: + request_header["Content-Type"] = content_type + + return request_header + + # end method definition + + def parse_request_response( + self, + response_object: requests.Response, + additional_error_message: str = "", + show_error: bool = True, + ) -> dict | None: + """Converts the request response (JSon) to a Python dict in a safe way + that also handles exceptions. It first tries to load the response.text + via json.loads() that produces a dict output. Only if response.text is + not set or is empty it just converts the response_object to a dict using + the vars() built-in method. + + Args: + response_object (object): this is reponse object delivered by the request call + additional_error_message (str, optional): use a more specific error message + in case of an error + show_error (bool): True: write an error to the log file + False: write a warning to the log file + Returns: + dict: response information or None in case of an error + """ + + if not response_object: + return None + + try: + if response_object.text: + dict_object = json.loads(response_object.text) + else: + dict_object = vars(response_object) + except json.JSONDecodeError as exception: + if additional_error_message: + message = "Cannot decode response as JSon. {}; error -> {}".format( + additional_error_message, exception + ) + else: + message = "Cannot decode response as JSon; error -> {}".format( + exception + ) + if show_error: + logger.error(message) + else: + logger.warning(message) + return None + else: + return dict_object + + # end method definition + + def lookup_result_value( + self, response: dict, key: str, value: str, return_key: str + ) -> str | None: + """Lookup a property value based on a provided key / value pair in the + response properties of an Extended ECM REST API call. + + Args: + response (dict): REST response from an OTCS REST Call + key (str): property name (key) + value (str): value to find in the item with the matching key + return_key (str): determines which value to return based on the name of the dict key + Returns: + str: value of the property with the key defined in "return_key" + or None if the lookup fails + """ + + if not response: + return None + if not "results" in response: + return None + + results = response["results"] + + if not results or not isinstance(results, list): + return None + + for result in results: + if key in result and result[key] == value and return_key in result: + return result[return_key] + return None + + # end method definition + + def exist_result_item( + self, response: dict, key: str, value: str, results_marker: str = "results" + ) -> bool: + """Check existence of key / value pair in the response properties of a Core Share API call. + + Args: + response (dict): REST response from a Core Share API call + key (str): property name (key) + value (str): value to find in the item with the matching key + Returns: + bool: True if the value was found, False otherwise + """ + + if not response: + return False + + if results_marker in response: + results = response[results_marker] + if not results or not isinstance(results, list): + return False + + for result in results: + if value == result[key]: + return True + else: + if not key in response: + return False + if value == response[key]: + return True + + return False + + # end method definition + + def get_result_value( + self, + response: dict | list, + key: str, + index: int = 0, + ) -> str | None: + """Get value of a result property with a given key of a Core Share API call. + + Args: + response (dict or list): REST response from a Core Share REST Call + key (str): property name (key) + index (int, optional): Index to use (1st element has index 0). + Defaults to 0. + Returns: + str: value for the key, None otherwise + """ + + if not response: + return None + + # response is mostly a dictionary but in some cases also a list (e.g. add_group_member()) + if isinstance(response, list): + if len(response) - 1 < index: + return None + if not key in response[index]: + return None + value = response[index][key] + return value + + if isinstance(response, dict): + # Does response have a "results" substructure? + if "results" in response: + # we expect results to be a list! + values = response["results"] + if ( + not values + or not isinstance(values, list) + or len(values) - 1 < index + ): + return None + if not key in values[index]: + return None + value = values[index][key] + else: # simple response as dictionary - try to find key in response directly: + if not key in response: + return None + value = response[key] + + return value + + return None + + # end method definition + + def authenticate_admin( + self, + revalidate: bool = False, + ) -> str | None: + """Authenticate at Core Share as Tenant Admin. + + Args: + revalidate (bool, optional): determinse if a re-athentication is enforced + (e.g. if session has timed out with 401 error) + Returns: + str: Access token. Also stores access token in self._access_token. None in case of error + """ + + # Already authenticated and session still valid? + if self._access_token_admin and not revalidate: + logger.debug( + "Session still valid - return existing access token -> %s", + str(self._access_token_admin), + ) + return self._access_token_admin + + request_url = self.config()["authorizationUrlAdmin"] + + request_header = REQUEST_LOGIN_HEADERS + + logger.debug("Requesting Core Share Admin Access Token from -> %s", request_url) + + response = None + self._access_token_admin = None + + try: + response = requests.post( + request_url, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + except requests.exceptions.ConnectionError as exception: + logger.warning( + "Unable to connect to -> %s : %s", + request_url, + exception, + ) + return None + + if response.ok: + authenticate_dict = self.parse_request_response(response) + if not authenticate_dict: + return None + else: + cookies = response.cookies + if "AccessToken" in cookies: + access_token = cookies["AccessToken"] + + # String manipulation to extract pure AccessToken + if access_token.startswith("s%3A"): + access_token = access_token[4:] + access_token = access_token.rsplit(".", 1)[0] + + # Store authentication access_token: + self._access_token_admin = access_token + logger.debug( + "Tenant Admin Access Token -> %s", self._access_token_admin + ) + else: + return None + else: + logger.error( + "Failed to request a Core Share Tenant Admin Access Token; error -> %s", + response.text, + ) + return None + + return self._access_token_admin + + # end method definition + + def authenticate_user( + self, revalidate: bool = False, grant_type: str = "password" + ) -> str | None: + """Authenticate at Core Share as Tenant Service User (TSU) with client ID and client secret. + + Args: + revalidate (bool, optional): determinse if a re-athentication is enforced + (e.g. if session has timed out with 401 error) + grant_type (str, optional): Can either be "client_credentials" (default) or "password". + Returns: + str: Access token. Also stores access token in self._access_token. None in case of error + """ + + # Already authenticated and session still valid? + if self._access_token_user and not revalidate: + logger.debug( + "Session still valid - return existing access token -> %s", + str(self._access_token_user), + ) + return self._access_token_user + + if grant_type == "client_credentials": + request_url = self.config()["authorizationUrlCredentials"] + elif grant_type == "password": + request_url = self.config()["authorizationUrlPassword"] + else: + logger.error("Illegal grant type - authorization not possible!") + return None + + request_header = REQUEST_LOGIN_HEADERS + + logger.debug( + "Requesting Core Share Tenant Service User Access Token from -> %s", + request_url, + ) + + response = None + self._access_token_user = None + + try: + response = requests.post( + request_url, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + except requests.exceptions.ConnectionError as exception: + logger.warning( + "Unable to connect to -> %s : %s", + request_url, + exception, + ) + return None + + if response.ok: + authenticate_dict = self.parse_request_response(response) + if not authenticate_dict: + return None + else: + # Store authentication access_token: + self._access_token_user = authenticate_dict["access_token"] + logger.debug( + "Tenant Service User Access Token -> %s", self._access_token_user + ) + else: + logger.error( + "Failed to request a Core Share Tenant Service User Access Token; error -> %s", + response.text, + ) + return None + + return self._access_token_user + + # end method definition + + def get_groups(self, offset: int = 0, count: int = 25) -> dict | None: + """Get Core Share groups. + + Args: + offset (int, optional): index of first group (for pagination). Defaults to 0. + count (int, optional): number of groups to return (page length). Defaults to 25. + + Returns: + dict | None: Dictionary with the Core Share group data or None if the request fails. + + Example response: + { + '_links': { + 'self': {'href': '/api/v1/groups?offset=undefined&count=25'}, + 'next': {'href': '/api/v1/groups?offset=NaN&count=25'} + }, + 'results': [ + { + 'id': '2593534258421173790', + 'type': 'group', + 'tenantId': '2157293035593927996', + 'displayName': 'Innovate', + 'name': 'Innovate', + 'createdAt': '2024-05-01T09:29:36.370Z', + 'uri': '/api/v1/groups/2593534258421173790', + 'imageuri': '/img/app/group-default-lrg.png', + 'thumbnailUri': '/img/app/group-default-sm.png', + 'defaultImageUri': True, + 'description': 'Demo Company Top Level Group', + 'tenantName': 'terrarium' + } + ] + } + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = self.config()["groupsUrl"] + "?offset={}&count={}".format( + offset, count + ) + + logger.debug("Get Core Share groups; calling -> %s", request_url) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to get Core Share groups; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_group( + self, + group_name: str, + description: str = "", + ) -> dict | None: + """Add a new Core Share group. This requires a Tenent Admin authorization. + + Args: + group_name (str): Name of the new Core Share group + description (str): Description of the new Core Share group + + Returns: + dict | None: Dictionary with the Core Share Group data or None if the request fails. + + Example response: + { + "id": "2593534258421173790", + "state": "enabled", + "isEnabled": true, + "isDeleted": false, + "uri": "/api/v1/groups/2593534258421173790", + "description": "Demo Company Top Level Group", + "name": "Innovate", + "imageUri": "/img/icons/mimeIcons/mime_group32.svg", + "thumbnailUri": "/img/icons/mimeIcons/mime_group32.svg", + "defaultImageUri": true, + "memberCount": 0, + "createdAt": "2024-05-01T09:29:36.370Z", + "type": "group", + "isSync": false, + "tenantId": "2157293035593927996" + } + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + request_url = self.config()["groupsUrl"] + + payload = {"name": group_name, "description": description} + + logger.debug( + "Adding Core Share group -> %s; calling -> %s", group_name, request_url + ) + + retries = 0 + while True: + response = requests.post( + request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to add Core Share group -> %s; status -> %s; error -> %s", + group_name, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_group_members(self, group_id: str) -> dict | None: + """Get Core Share group members. + + Args: + group_id (str): ID of the group to deliver the members for. + + Returns: + dict | None: Dictionary with the Core Share group membership data or None if the request fails. + + Example response: + { + 'groupMembers': [ + { + 'id': '2422700172682204885', + 'type': 'user', + 'tenantId': '2157293035593927996', + 'firstName': 'Andy', + 'lastName': 'Wyatt', + 'displayName': 'Andy Wyatt', + 'title': 'Buyer', + 'company': 'terrarium', + 'email': 'awyatt@M365x41497014.onmicrosoft.com', + 'otSaaSUID': 'f5a6b58e-ad43-4e2d-a3e6-5c0fcd5cd4b1', + 'otSaaSPID': 'aa49f566-0874-41e9-9924-452852ebaf7a', + 'uri': '/api/v1/users/2422700172682204885', + 'imageuri': '/img/app/profile-default-lrg.png', + 'thumbnailUri': '/img/app/topbar-profile-default-sm.png', + 'defaultImageUri': True, + 'isSpecificGroupAdmin': False + } + ], + 'pending': [ + + ], + 'count': 0 + } + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_user() + request_url = self.config()["groupsUrl"] + "/{}".format(group_id) + "/members" + + logger.debug( + "Get members for Core Share group -> %s; calling -> %s", + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to get members of Core Share group -> %s; status -> %s; error -> %s", + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_group_member( + self, group_id: str, user_id: str, is_group_admin: bool = False + ) -> list | None: + """Add a Core Share user to a Core Share group. + + Args: + group_id (str): ID of the Core Share Group + user_id (str): ID of the Core Share User + + Returns: + list | None: Dictionary with the Core Share group membership or None if the request fails. + + Example Response ('errors' is only output if success = False): + [ + { + 'member': 'alewis@qa.idea-te.eimdemo.com', + 'success': True, + 'user': { + 'id': '2595801699801110696', + 'email': 'alewis@qa.idea-te.eimdemo.com', + 'otSaaSUID': '41325224-bbcf-4238-82b4-a9283be74821', + 'otSaaSPID': 'aa49f566-0874-41e9-9924-452852ebaf7a', + 'uri': '/api/v1/users/2595801699801110696', + 'tenantId': '2157293035593927996', + 'title': 'Records Manager', + 'company': 'Innovate', + 'lastName': 'Lewis', + 'firstName': 'Anne', + 'displayName': 'Lewis Anne', + 'type': 'user', + 'imageUri': 'https://core.opentext.com/api/v1/users/2595801699801110696/photo?id=0fbedc509fdfa1d27bcb5b3615714988e5f8e24598f0fc74b776ff049faef1f2', + 'thumbnailUri': 'https://core.opentext.com/api/v1/users/2595801699801110696/photo?s=small&id=0fbedc509fdfa1d27bcb5b3615714988e5f8e24598f0fc74b776ff049faef1f2', + 'defaultImageUri': False, + 'isConfirmed': True, + 'isEnabled': True + } + 'errors': [ + { + 'code': 'groupInvitationExists', + 'message': 'The user has already been invited to the group' + } + ] + } + ] + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_user() + request_url = self.config()["groupsUrl"] + "/{}".format(group_id) + "/members" + + user = self.get_user_by_id(user_id=user_id) + user_email = self.get_result_value(response=user, key="email") + + payload = {"members": [user_email], "specificGroupRole": is_group_admin} + + logger.debug( + "Add Core Share user -> %s (%s) as %s to Core Share group -> %s; calling -> %s", + user_email, + user_id, + "group member" if not is_group_admin else "group admin", + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.post( + request_url, + headers=request_header, + json=payload, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to add Core Share user -> %s to Core Share group -> %s; status -> %s; error -> %s", + user_id, + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def remove_group_member( + self, group_id: str, user_id: str, is_group_admin: bool = False + ) -> list | None: + """Remove a Core Share user from a Core Share group. + + Args: + group_id (str): ID of the Core Share Group + user_id (str): ID of the Core Share User + + Returns: + list | None: Dictionary with the Core Share group membership or None if the request fails. + + Example Response ('errors' is only output if success = False): + [ + { + 'member': 'alewis@qa.idea-te.eimdemo.com', + 'success': True, + 'errors': [ + { + 'code': 'groupInvitationExists', + 'message': 'The user has already been invited to the group' + } + ] + } + ] + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = self.config()["groupsUrl"] + "/{}".format(group_id) + "/members" + + user = self.get_user_by_id(user_id=user_id) + user_email = self.get_result_value(response=user, key="email") + + payload = {"members": [user_email], "specificGroupRole": is_group_admin} + + logger.debug( + "Remove Core Share user -> %s (%s) as %s from Core Share group -> %s; calling -> %s", + user_email, + user_id, + "group member" if not is_group_admin else "group admin", + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + request_url, + headers=request_header, + json=payload, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to remove Core Share user -> %s from Core Share group -> %s; status -> %s; error -> %s", + user_id, + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_group_by_id(self, group_id: str) -> dict | None: + """Get a Core Share group by its ID. + + Args: + None + + Returns: + dict | None: Dictionary with the Core Share group data or None if the request fails. + + Response example: + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = self.config()["groupsUrl"] + "/" + group_id + + logger.debug( + "Get Core Share group with ID -> %s; calling -> %s", group_id, request_url + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to get Core Share group with ID -> %s; status -> %s; error -> %s", + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_group_by_name(self, name: str) -> dict | None: + """Get Core Share group by its name. + + Args: + name (str): Name of the group to search. + + Returns: + dict | None: Dictionary with the Core Share group data or None if the request fails. + + Example result: + { + 'results': [ + { + 'id': '2594934169968578199', + 'type': 'group', + 'tenantId': '2157293035593927996', + 'displayName': 'Test Group', + 'name': 'Test Group', + 'createdAt': '2024-05-03T07:50:58.830Z', + 'uri': '/api/v1/groups/2594934169968578199', + 'imageuri': '/img/app/group-default-lrg.png', + 'thumbnailUri': '/img/app/group-default-sm.png', + 'defaultImageUri': True, + 'description': '', + 'tenantName': 'terrarium' + } + ], + 'total': 1 + } + """ + + groups = self.search_groups( + query_string=name, + ) + + return groups + + # end method definition + + def search_groups(self, query_string: str) -> dict | None: + """Search Core Share group(s) by name. + + Args: + query_string(str): Query for the group name / property + + Returns: + dict | None: Dictionary with the Core Share user data or None if the request fails. + + Example response: + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + request_url = self.config()["searchGroupUrl"] + "?q=" + query_string + + logger.debug( + "Search Core Share group by -> %s; calling -> %s", query_string, request_url + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Cannot find Core Share group with name / property -> %s; status -> %s; error -> %s", + query_string, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_users(self) -> dict | None: + """Get Core Share users. + + Args: + None + + Returns: + dict | None: Dictionary with the Core Share user data or None if the request fails. + + Example response (it is a list!): + [ + { + 'id': '2400020228198108758', + 'type': 'user', + 'tenantId': '2157293035593927996', + 'firstName': 'Technical Marketing', + 'lastName': 'Service', + 'displayName': 'Technical Marketing Service', + 'title': 'Service User', + 'company': 'terrarium', + 'email': 'tm-service@opentext.com', + 'otSaaSUID': 'fdb07113-4854-4f63-a208-55759ee925ce', + 'otSaaSPID': 'aa49f566-0874-41e9-9924-452852ebaf7a', + 'state': 'enabled', + 'isEnabled': True, + 'isConfirmed': True, + 'quota': 2147483648, + 'usage': 10400, + 'uri': '/api/v1/users/2400020228198108758', + 'imageuri': '/img/app/profile-default-lrg.png', + 'thumbnailUri': '/img/app/topbar-profile-default-sm.png', + 'defaultImageUri': True + 'rootId': '2400020231108955735' + 'userRoot' : { + { + 'size': 0, + 'id': '2400020231108955735', + 'resourceType': 1, + 'name': 'Files', + 'createdById': '2400020228198108758', + 'created': '2023-08-08T09:31:46.654Z', + 'lastModified': '2023-09-19T15:11:56.925Z', + 'lastModifiedById': '2400020228198108758', + 'currentVersionNumber': None, + 'currentVersionId': None, + 'childCount': '4', + 'shareCount': 1, + 'deleteCount': 0, + 'trashState': 0, + 'imageId': None, + 'thumbnailId': None, + 'tedsImageId': None, + 'tedsThumbnailId': None, + 'parentId': None, + 'tagCount': 0, + 'versionCommentCount': 0, + 'draftCommentCount': 0, + 'subTypeId': None, + 'contentOriginId': None, + 'externalData': None, + 'tenantId': '2157293035593927996', + 'nodeType': 1, + 'likesCount': 0, + 'commentCount': 0, + 'createdAt': '2023-08-08T09:31:46.655Z', + 'updatedAt': '2023-09-19T15:11:56.925Z' + } + } + ... + }, + ... + ] + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + request_url = self.config()["usersUrlv1"] + + logger.debug("Get Core Share users; calling -> %s", request_url) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to get Core Share users; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_user_by_id(self, user_id: str) -> dict | None: + """Get a Core Share user by its ID. + + Args: + None + + Returns: + dict | None: Dictionary with the Core Share user data or None if the request fails. + + Response example: + { + 'accessRoles': [], + 'commentCount': 0, + 'company': 'terrarium', + 'createdAt': '2024-04-19T11:58:34.240Z', + 'defaultImageUri': True, + 'disabledAt': None, + 'displayName': 'Sato Ken', + 'email': 'ksato@idea-te.eimdemo.com', + 'firstName': 'Ken', + 'id': '2584911925942946703', + 'otSaaSUID': '6cab5035-abbc-481c-b049-10b4efae7408', + 'otSaaSPID': 'aa49f566-0874-41e9-9924-452852ebaf7a', + 'imageUri': 'https://core.opentext.com/img/app/profile-default-lrg.png', + 'invitedAt': '2024-04-19T11:58:36.307Z', + 'isAdmin': False, + 'isConfirmed': True, + 'isEnabled': True, + 'isSync': False, + 'lastLoginDate': -1, + 'lastName': 'Sato', + 'likesCount': 0, + 'rootId': '2584911935422073756', + 'state': 'enabled', + 'stateChanged': '2024-04-19T12:03:23.736Z', + 'tenantId': '2157293035593927996', + 'thumbnailUri': 'https://core.opentext.com/img/app/topbar-profile-default-sm.png', + 'title': 'Real Estate Manager', + 'type': 'user', + 'updatedAt': '2024-04-19T12:03:23.731Z', + 'uri': '/api/v1/users/2584911925942946703', + 'userRoot': { + 'size': 0, + 'id': '2584911935422073756', + 'resourceType': 1, + 'name': 'Files', + 'createdById': '2584911925942946703', + 'created': '2024-04-19T11:58:35.370Z', + 'lastModified': '2024-04-19T11:58:35.370Z', + 'lastModifiedById': '2584911925942946703', + 'currentVersionNumber': None, + 'currentVersionId': None, + 'childCount': '0', + 'shareCount': 1, + 'deleteCount': 0, + 'trashState': 0, + 'imageId': None, + 'thumbnailId': None, + 'tedsImageId': None, + 'tedsThumbnailId': None, + 'parentId': None, + ... + }, + 'hasRequestedDelete': False, + 'defaultBaseUrl': 'https://core.opentext.com', + 'quota': 10737418240, + 'usage': 0 + } + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = self.config()["usersUrlv1"] + "/" + user_id + + logger.debug( + "Get Core Share user with ID -> %s; calling -> %s", user_id, request_url + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to get Core Share user with ID -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_user_by_name( + self, first_name: str, last_name: str, user_status: str = "internal-native" + ) -> dict | None: + """Get Core Share user by its first and last name. + + Args: + first_name (str): First name of the users to search. + last_name (str): Last name of the users to search. + user_status (str, optional): type of users. Possible values: + * internal-enabled + * internal-pending + * internal-locked + * internal-native (non-SSO) + * internal-sso + + Returns: + dict | None: Dictionary with the Core Share user data or None if the request fails. + """ + + # Search the users with this first and last name (and hope this is unique ;-). + users = self.search_users( + query_string=first_name + " " + last_name, + user_status=user_status, + ) + + return users + + # end method definition + + def get_user_by_email( + self, email: str, user_status: str = "internal-native" + ) -> dict | None: + """Get Core Share user by its email address. + + Args: + email (str): Email address of the users to search. + user_status (str, optional): type of users. Possible values: + * internal-enabled + * internal-pending + * internal-locked + * internal-native (non-SSO) + * internal-sso + + Returns: + dict | None: Dictionary with the Core Share user data or None if the request fails. + """ + + # Search the users with this first and last name (and hope this is unique ;-). + users = self.search_users( + query_string=email, + user_status=user_status, + ) + + return users + + # end method definition + + def search_users( + self, + query_string: str, + user_status: str = "internal-native", + page_size: int = 100, + ) -> dict | None: + """Search Core Share user(s) by name / property. Needs to be a Tenant Administrator to do so. + + Args: + query_string (str): string to query the user(s) + user_status (str, optional): type of users. Possible values: + * internal-enabled + * internal-pending + * internal-locked + * internal-native (non-SSO) + * internal-sso + page_size (int, optional): max number of results per page. We set the default to 100 (Web UI uses 25) + + Returns: + dict | None: Dictionary with the Core Share user data or None if the request fails. + + Example response: + { + "results": [ + { + "id": "2422698421996494632", + "type": "user", + "tenantId": "2157293035593927996", + "firstName": "Andy", + "lastName": "Wyatt", + "displayName": "Andy Wyatt", + "title": "Buyer", + "company": "terrarium", + "email": "awyatt@M365x46777101.onmicrosoft.com", + "otSaaSUID": "0842d1e1-acfc-425b-994a-e2dcb4d333c6", + "otSaaSPID": "aa49f566-0874-41e9-9924-452852ebaf7a", + "state": "enabled", + "isEnabled": true, + "isConfirmed": true, + "isAdmin": false, + "accessRoles": [], + "hasBeenDelegated": null, + "createdAt": "2023-09-08T16:29:17.680Z", + "lastLoginDate": "2023-10-05T16:14:16Z", + "quota": 1073741824, + "usage": 0, + "rootId": "2422698425964306217", + "uri": "/api/v1/users/2422698421996494632", + "imageuri": "/img/app/profile-default-lrg.png", + "thumbnailUri": "/img/app/topbar-profile-default-sm.png", + "defaultImageUri": true + }, + ... + ] + } + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + request_url = ( + self.config()["searchUserUrl"] + + "/{}".format(user_status) + + "?q=" + + query_string + + "&pageSize=" + + str(page_size) + ) + + logger.debug( + "Search Core Share user by -> %s; calling -> %s", query_string, request_url + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to search Core Share user with name / property -> %s; status -> %s; error -> %s", + query_string, + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_user( + self, + first_name: str, + last_name: str, + email: str, + password: str | None = None, + title: str | None = None, + company: str | None = None, + ) -> dict | None: + """Add a new Core Share user. This requires a Tenent Admin authorization. + + Args: + first_name (str): First name of the new user + last_name (str): Last name of the new user + email (str): Email of the new Core Share user + password (str | None, optional): Password of the new Core Share user + title (str | None, optional): Title of the user + company (str | None, optional): Name of the Company of the user + + Returns: + dict | None: Dictionary with the Core Share User data or None if the request fails. + + Example response: + { + "accessRoles": [], + "commentCount": 0, + "company": "terrarium", + "createdAt": "2024-05-01T09:43:22.962Z", + "defaultImageUri": true, + "disabledAt": null, + "displayName": "Tester Theo", + "email": "theo@tester.com", + "firstName": "Theo", + "id": "2593541192377435562", + "otSaaSUID": "77043e17-105c-418f-b4ba-1bef9f15937c", + "otSaaSPID": "aa49f566-0874-41e9-9924-452852ebaf7a", + "imageUri": "https://core.opentext.com/img/app/profile-default-lrg.png", + "invitedAt": "2024-05-01T09:43:23.658Z", + "isAdmin": false, + "isConfirmed": false, + "isEnabled": true, + "isSync": false, + "lastLoginDate": -1, + "lastName": "Tester", + "likesCount": 0, + "rootId": "2593541195170842028", + "state": "pending", + "stateChanged": "2024-05-01T09:43:22.959Z", + "tenantId": "2157293035593927996", + "thumbnailUri": "https://core.opentext.com/img/app/topbar-profile-default-sm.png", + "title": "VP Product Management", + "type": "user", + "updatedAt": "2024-05-01T09:43:23.658Z", + "uri": "/api/v1/users/2593541192377435562", + "hasRequestedDelete": false, + "defaultBaseUrl": "https://core.opentext.com", + "quota": 10737418240, + "usage": 0 + } + """ + + if not self._access_token_admin: + self.authenticate_admin() + + # here we want the request to determine the content type automatically: + request_header = self.request_header_admin(content_type="") + request_url = self.config()["invitesUrl"] + + payload = { + "firstName": first_name, + "lastName": last_name, + "email": email, + "quota": 10737418240, + } + if password: + payload["password"] = password + if title: + payload["title"] = title + if company: + payload["company"] = company + + logger.debug( + "Adding Core Share user -> %s %s; calling -> %s", + first_name, + last_name, + request_url, + ) + + retries = 0 + while True: + response = requests.post( + request_url, + headers=request_header, + json=payload, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to add Core Share user -> %s %s (%s); status -> %s; error -> %s", + first_name, + last_name, + email, + response.status_code, + response.text, + ) + return None + + # end method definition + + def resend_user_invite(self, user_id: str) -> dict: + """Resend the invite for a Core Share user. + + Args: + user_id (str): The Core Share user ID. + + Returns: + dict: Response from the Core Share API. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = self.config()["usersUrlv1"] + "/{}".format(user_id) + + logger.debug( + "Resend invite for Core Share user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + update_data = {"resend": True} + + retries = 0 + while True: + response = requests.put( + request_url, + json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Admin Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to resend invite for Core Share user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_user(self, user_id: str, update_data: dict) -> dict: + """Update a Core Share user. + + Args: + user_id (str): ID of the Core Share user. + + Returns: + dict: Response or None if the request has failed. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = self.config()["usersUrlv1"] + "/{}".format(user_id) + + logger.debug( + "Update data of Core Share user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + if "email" in update_data and not "password" in update_data: + logger.warning( + "Trying to update the email without providing the password. This is likely to fail..." + ) + + retries = 0 + while True: + response = requests.put( + request_url, + json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Admin Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to update Core Share user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_user_access_role(self, user_id: str, role_id: int) -> dict: + """Add an access role to a Core Share user. + + Args: + user_id (str): The Core Share user ID. + role_id (int): The role ID: + * Content Manager = 5 + * Group Admin = 3 + + Returns: + dict: Response from the Core Share API. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = ( + self.config()["usersUrlv1"] + + "/{}".format(user_id) + + "/roles/" + + str(role_id) + ) + + logger.debug( + "Add access role -> %s to Core Share user with ID -> %s; calling -> %s", + str(role_id), + user_id, + request_url, + ) + + retries = 0 + while True: + response = requests.put( + request_url, + # json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Admin Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to add access role -> %s to Core Share user -> %s; status -> %s; error -> %s", + str(role_id), + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def remove_user_access_role(self, user_id: str, role_id: int) -> dict: + """Remove an access role from a Core Share user. + + Args: + user_id (str): The Core Share user ID. + role_id (int): The role ID: + * Content Manager = 5 + * Group Admin = 3 + + Returns: + dict: Response from the Core Share API. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = ( + self.config()["usersUrlv1"] + + "/{}".format(user_id) + + "/roles/" + + str(role_id) + ) + + logger.debug( + "Remove access role -> %s from Core Share user with ID -> %s; calling -> %s", + str(role_id), + user_id, + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + request_url, + # json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Admin Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to remove access role -> %s from Core Share user -> %s; status -> %s; error -> %s", + str(role_id), + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_user_access_roles( + self, + user_id: str, + is_admin: bool | None = None, + is_content_manager: bool | None = None, + is_group_admin: bool | None = None, + ) -> dict: + """Define the access roles of a Core Share user. + + Args: + user_id (str): ID of the Core Share user + is_content_manager (bool | None, optional): Assign Content Manager Role if True. + Removes Content Manager Role if False. + Does nothing if None. + Defaults to None. + is_group_admin (bool | None, optional): Assign Group Admin Role if True. + Removes Group Admin Role if False. + Does nothing if None. + Defaults to None. + is_admin (bool | None, optional): Makes user Admin if True. + Removes Admin rights if False. + Does nothing if None. + Defaults to None. + + Returns: + dict: Response from the Core Share API. + """ + + CONTENT_MANAGER_ROLE_ID = 5 + GROUP_ADMIN_ROLE_ID = 3 + + response = None + + # Admins don't have/need specific access roles. They are controled by isAdmin flag. + if is_admin is not None: + update_data = {} + update_data["isAdmin"] = is_admin + response = self.update_user(user_id=user_id, update_data=update_data) + + # Only for non-admins the other two roles are usable: + if is_content_manager is not None: + if is_content_manager: + response = self.add_user_access_role( + user_id=user_id, role_id=CONTENT_MANAGER_ROLE_ID + ) + else: + response = self.remove_user_access_role( + user_id=user_id, role_id=CONTENT_MANAGER_ROLE_ID + ) + + if is_group_admin is not None: + if is_group_admin: + response = self.add_user_access_role( + user_id=user_id, role_id=GROUP_ADMIN_ROLE_ID + ) + else: + response = self.remove_user_access_role( + user_id=user_id, role_id=GROUP_ADMIN_ROLE_ID + ) + + return response + + # end method definition + + def update_user_password( + self, user_id: str, password: str, new_password: str + ) -> dict: + """Update the password of a Core Share user. + + Args: + user_id (str): The Core Share user ID. + password (str): Old user password. + new_password (str): New user password. + + Returns: + dict: Response from the Core Share API. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = self.config()["usersUrlv1"] + "/{}".format(user_id) + + logger.debug( + "Update password of Core Share user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + update_data = {"password": password, "newpassword": new_password} + + retries = 0 + while True: + response = requests.put( + request_url, + json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Admin Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to update password of Core Share user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_user_photo( + self, + user_id: str, + photo_path: str, + ) -> dict | None: + """Update the Core Share user photo. + + Args: + user_id (str): Core Share ID of the user + photo_path (str): file system path with the location of the photo + Returns: + dict | None: Dictionary with the Core Share User data or None if the request fails. + """ + + if not self._access_token_user: + self.authenticate_user() + + # Check if the photo file exists + if not os.path.isfile(photo_path): + logger.error("Photo file -> %s not found!", photo_path) + return None + + try: + # Read the photo file as binary data + with open(photo_path, "rb") as image_file: + photo_data = image_file.read() + except OSError as exception: + # Handle any errors that occurred while reading the photo file + logger.error( + "Error reading photo file -> %s; error -> %s", photo_path, exception + ) + return None + + request_url = self.config()["usersUrlv3"] + "/{}".format(user_id) + "/photo" + files = { + "file": (photo_path, photo_data, "image/jpeg"), + } + + logger.debug( + "Update profile photo of Core Share user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + retries = 0 + while True: + response = requests.post( + request_url, + files=files, + headers=self.request_header_user(content_type=""), + verify=False, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to update profile photo of Core Share user with ID -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_folders(self, parent_id: str) -> list | None: + """Get Core Share folders under a given parent ID. This runs under user credentials (not admin!) + + Args: + parent_id (str): ID of the parent folder or the rootID of a user + + Returns: + list | None: List with the Core Share folders data or None if the request fails. + + Example response (it is a list!): + [ + { + 'id': '2599466250228733940', + 'name': 'Global Trade AG (50031)', + 'size': 0, + 'created': '2024-05-09T13:55:24.899Z', + 'lastModified': '2024-05-09T13:55:33.069Z', + 'shareCount': 2, + 'isShared': True, + 'parentId': '2599466244163770353', + 'uri': '/api/v1/folders/2599466250228733940', + 'commentCount': 0, + 'isDeleted': False, + 'isLiked': False, + 'likesCount': 0, + 'locks': [], + 'createdBy': { + 'id': '2597156105373095597', + 'email': '6ccf1cb3-177e-4930-8baf-2d421cf92a5f', + 'uri': '/api/v1/users/2597156105373095597', + 'tenantId': '2595192600759637225', + 'tier': 'tier3', + 'title': '', + 'company': '', + 'lastName': '', + 'firstName': 'OpenText Service User', + 'displayName': 'OpenText Service User', + 'type': 'user', + 'imageUri': 'https://core.opentext.com/img/app/profile-default-lrg.png', + 'thumbnailUri': 'https://core.opentext.com/img/app/topbar-profile-default-sm.png', + 'defaultImageUri': True, + 'isConfirmed': True, + 'isEnabled': True + }, + 'lastModifiedBy': {...}, + 'owner': {...}, + 'permission': 1, + 'hasAttachments': False, + 'resourceType': 'folder', + 'tagCount': 0, + 'resourceSubType': {}, + 'contentOriginId': '0D949C67-473D-448C-8F4B-B2CCA769F586', + 'externalData': None, + 'childCount': 7, + 'contentOriginator': { + 'id': '0D949C67-473D-448C-8F4B-B2CCA769F586', + 'name': 'IDEA-TE-QA', + 'imageUri': '/api/v1/tenants/2595192600759637225/contentOriginator/images/0D949C67-473D-448C-8F4B-B2CCA769F586' + } + } + ] + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = ( + self.config()["foldersUrlv1"] + + "/{}".format(parent_id) + + "/children" + + "?limit=25&order=lastModified:desc&filter=any" + ) + + logger.debug( + "Get Core Share folders under parent -> %s; calling -> %s", + parent_id, + request_url, + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to get Core Share folders under parent -> %s; status -> %s; error -> %s", + parent_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def unshare_folder(self, resource_id: str) -> dict | None: + """Unshare Core Share folder with a given resource ID. + + Args: + resource_id (str): ID of the folder (resource) to unshare with all collaborators + + Returns: + dict | None: Dictionary with the Core Share folders data or None if the request fails. + + Example response (it is a list!): + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = ( + self.config()["foldersUrlv1"] + "/{}".format(resource_id) + "/collaborators" + ) + + logger.debug( + "Unshare Core Share folder -> %s; calling -> %s", + resource_id, + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to unshare Core Share folder -> %s; status -> %s; error -> %s", + resource_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def delete_folder(self, resource_id: str) -> dict | None: + """Delete Core Share folder with a given resource ID. + + Args: + resource_id (str): ID of the folder (resource) to delete + + Returns: + dict | None: Dictionary with the Core Share request data or None if the request fails. + + Example response (it is a list!): + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = self.config()["foldersUrlv1"] + "/{}".format(resource_id) + + payload = {"state": "deleted"} + + logger.debug( + "Delete Core Share folder -> %s; calling -> %s", + resource_id, + request_url, + ) + + retries = 0 + while True: + response = requests.put( + request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to delete Core Share folder -> %s; status -> %s; error -> %s", + resource_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def delete_document(self, resource_id: str) -> dict | None: + """Delete Core Share document with a given resource ID. + + Args: + resource_id (str): ID of the document (resource) to delete + + Returns: + dict | None: Dictionary with the Core Share request data or None if the request fails. + + Example response (it is a list!): + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + request_url = self.config()["documentsUrlv1"] + "/{}".format(resource_id) + + payload = {"state": "deleted"} + + logger.debug( + "Delete Core Share document -> %s; calling -> %s", + resource_id, + request_url, + ) + + retries = 0 + while True: + response = requests.put( + request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to delete Core Share document -> %s; status -> %s; error -> %s", + resource_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def leave_share(self, user_id: str, resource_id: str) -> dict | None: + """Remove a Core Share user from a share (i.e. the user leaves the share) + + Args: + user_id (str): Core Share ID of the user. + resource_id (str): Core Share ID of the shared folder. + + Returns: + dict | None: Reponse of the REST call or None in case of an error. + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + + request_url = ( + self.config()["foldersUrlv1"] + + "/{}".format(resource_id) + + "/collaborators/" + + str(user_id) + ) + + payload = {"action": "LEAVE_SHARE"} + + logger.debug( + "User -> %s leaves Core Share shared folder -> %s; calling -> %s", + user_id, + resource_id, + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to leave Core Share folder -> %s; status -> %s; error -> %s", + resource_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def stop_share(self, user_id: str, resource_id: str) -> dict | None: + """Stop of share of a user. + + Args: + user_id (str): Core Share ID of the user. + resource_id (str): Core Share ID of the shared folder. + + Returns: + dict | None: Response of the REST call or None in case of an error. + """ + + if not self._access_token_user: + self.authenticate_user() + + request_header = self.request_header_user() + + request_url = ( + self.config()["foldersUrlv1"] + "/{}".format(resource_id) + "/collaborators" + ) + + logger.debug( + "User -> %s stops sharing Core Share shared folder -> %s; calling -> %s", + user_id, + resource_id, + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + request_url, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_user(revalidate=True) + request_header = self.request_header_user() + retries += 1 + else: + logger.error( + "Failed to stop sharing Core Share folder -> %s; status -> %s; error -> %s", + resource_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def cleanup_user_files( + self, user_id: str, user_login: str, user_password: str + ) -> bool: + """Cleanup files of a user. This handles different types of resources. + * Local resources - not shared + * Resources shared by the user + * Resources shared by other users or groups + This method inpersonate as the user. Only the user can delete its folders. + The Core Share admin is not entitled to do this. + + Args: + user_id (str): Core Share ID of the user + user_login (str): Core Share email (= login) of the user + user_password (str): Core Share password of the user + + Returns: + bool: True = success, False in case of an error. + """ + + user = self.get_user_by_id(user_id=user_id) + user_id = self.get_result_value(user, "id") + user_root_folder_id = self.get_result_value(user, "rootId") + + is_confirmed = self.get_result_value(response=user, key="isConfirmed") + if not is_confirmed: + logger.info( + "User -> %s is not yet confirmed - so it cannot have files to cleanup.", + user_id, + ) + return True + + logger.info("Inpersonate as user -> %s to cleanup files...", user_login) + + # Save admin credentials the class has been initialized with: + admin_credentials = self.credentials() + + # Change the credentials to the user owning the file - admin + # is not allowed to see user files! + self.set_credentials(username=user_login, password=user_password) + + # Authenticate as given user: + self.authenticate_user(revalidate=True) + + success = True + + # Get all folders of the user: + response = self.get_folders(parent_id=user_root_folder_id) + if not response or not response["results"]: + logger.info("User -> %s has no items to cleanup!", user_id) + else: + items = response["results"] + for item in items: + if item["isShared"]: + if item["owner"]["id"] == user_id: + logger.info( + "User -> %s stops sharing item -> %s (%s)...", + user_id, + item["name"], + item["id"], + ) + response = self.stop_share( + user_id=user_id, resource_id=item["id"] + ) + if not response: + success = False + logger.info( + "User -> %s deletes unshared item -> %s (%s)...", + user_id, + item["name"], + item["id"], + ) + response = self.delete_folder(item["id"]) + if not response: + success = False + else: + logger.info( + "User -> %s leaves shared folder -> '%s' (%s)...", + user_id, + item["name"], + item["id"], + ) + response = self.leave_share( + user_id=user_id, resource_id=item["id"] + ) + if not response: + success = False + else: + logger.info( + "User -> %s deletes local item -> '%s' (%s) of type -> '%s'...", + user_id, + item["name"], + item["id"], + item["resourceType"], + ) + if item["resourceType"] == "folder": + response = self.delete_folder(item["id"]) + elif item["resourceType"] == "document": + response = self.delete_document(item["id"]) + else: + logger.error( + "Unsupport resource type -> '%s'", item["resourceType"] + ) + response = None + if not response: + success = False + + logger.info( + "End inpersonation and switch back to admin account -> %s...", + admin_credentials["username"], + ) + + # Reset credentials to admin: + self.set_credentials( + admin_credentials["username"], admin_credentials["password"] + ) + # Authenticate as administrator the class has been initialized with: + self.authenticate_user(revalidate=True) + + return success + + # end method definition + + def get_group_shares(self, group_id: str) -> dict | None: + """Get (incoming) shares of a Core Share group. + + Args: + group_id (str): Core Share ID of a group + + Returns: + dict | None: Incoming shares or None if the request fails. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = ( + self.config()["groupsUrl"] + "/{}".format(group_id) + "/shares/incoming" + ) + + logger.debug( + "Get shares of Core Share group -> %s; calling -> %s", + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.get( + request_url, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to get shares of Core Share group -> %s; status -> %s; error -> %s", + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def revoke_group_share(self, group_id: str, resource_id: str) -> dict | None: + """Revoke sharing of a folder with a group. + + Args: + group_id (str): ID of the Core Share group + resource_id (str): ID of the Core share folder + + Returns: + dict | None: Response or None if the request fails. + """ + + if not self._access_token_admin: + self.authenticate_admin() + + request_header = self.request_header_admin() + + request_url = ( + self.config()["foldersUrlv1"] + + "/{}".format(resource_id) + + "/collaboratorsAsAdmin/" + + str(group_id) + ) + + logger.debug( + "Revoke sharing of folder -> %s with group -> %s; calling -> %s", + resource_id, + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + request_url, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate_admin(revalidate=True) + request_header = self.request_header_admin() + retries += 1 + else: + logger.error( + "Failed to revoke sharing Core Share folder -> %s with group -> %s; status -> %s; error -> %s", + resource_id, + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def cleanup_group_shares(self, group_id: str) -> bool: + """Cleanup all incoming shares of a group. + The Core Share admin is required to do this. + + Args: + group_id (str): Core Share ID of the group + + Returns: + bool: True = success, False in case of an error. + """ + + response = self.get_group_shares(group_id=group_id) + + if not response or not response["shares"]: + logger.info("Group -> %s has no shares to revoke!", group_id) + return True + + success = True + + items = response["shares"] + for item in items: + logger.info( + "Revoke sharing of folder -> %s (%s) with group -> %s...", + item["name"], + item["id"], + group_id, + ) + response = self.revoke_group_share( + group_id=group_id, resource_id=item["id"] + ) + if not response: + success = False + + return success + + # end method definition diff --git a/pyxecm/customizer/__init__.py b/pyxecm/customizer/__init__.py index 50d0cf5..57f420e 100644 --- a/pyxecm/customizer/__init__.py +++ b/pyxecm/customizer/__init__.py @@ -1,8 +1,12 @@ """PYXECM classes for Customizer""" +from .browser_automation import BrowserAutomation + from .customizer import Customizer from .k8s import K8s from .m365 import M365 from .payload import Payload from .sap import SAP from .salesforce import Salesforce +from .successfactors import SuccessFactors +from .servicenow import ServiceNow diff --git a/pyxecm/customizer/browser_automation.py b/pyxecm/customizer/browser_automation.py index a359079..c3f4194 100644 --- a/pyxecm/customizer/browser_automation.py +++ b/pyxecm/customizer/browser_automation.py @@ -9,6 +9,7 @@ __init__ : class initializer. Start the browser session. set_chrome_options: Sets chrome options for Selenium. Chrome options for headless browser is enabled get_page: Load a page into the browser based on a given URL. +find_elem: Find an page element find_elem_and_click: Find an page element and click it find_elem_and_set: Find an page element and fill it with a new text. find_element_and_download: Clicks a page element to initiate a download. @@ -31,12 +32,17 @@ from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By + from selenium.webdriver.common.action_chains import ActionChains + from selenium.webdriver.remote.webelement import WebElement from selenium.common.exceptions import ( WebDriverException, NoSuchElementException, ElementNotInteractableException, ElementClickInterceptedException, + TimeoutException, + MoveTargetOutOfBoundsException, ) + except ModuleNotFoundError as module_exception: logger.warning("Module selenium is not installed") @@ -48,11 +54,14 @@ class By: ID: str = "" + class WebElement: + """Dummy class to avoid errors if selenium module cannot be imported""" + try: import chromedriver_autoinstaller except ModuleNotFoundError as module_exception: - logger.warning("Module chromedriver_autoinstaller is not installed") + logger.warning("Module chromedriver_autoinstaller is not installed!") class BrowserAutomation: @@ -60,9 +69,9 @@ class BrowserAutomation: def __init__( self, - base_url: str, - user_name: str, - user_password: str, + base_url: str = "", + user_name: str = "", + user_password: str = "", download_directory: str = "/tmp", take_screenshots: bool = False, automation_name: str = "screen", @@ -81,7 +90,7 @@ def __init__( automation_name ) - if self.take_screenshots: + if self.take_screenshots and not os.path.exists(self.screenshot_directory): os.makedirs(self.screenshot_directory) chromedriver_autoinstaller.install() self.browser = webdriver.Chrome(options=self.set_chrome_options()) @@ -128,7 +137,7 @@ def take_screenshot(self) -> bool: screenshot_file = "{}/{}-{}.png".format( self.screenshot_directory, self.screenshot_names, self.screen_counter ) - logger.info("Save browser screenshot to -> %s", screenshot_file) + logger.debug("Save browser screenshot to -> %s", screenshot_file) result = self.browser.get_screenshot_as_file(screenshot_file) self.screen_counter += 1 @@ -146,13 +155,13 @@ def get_page(self, url: str = "") -> bool: page_url = self.base_url + url try: - logger.info("Load page -> %s", page_url) + logger.debug("Load page -> %s", page_url) self.browser.get(page_url) except WebDriverException as exception: logger.error("Cannot load page -> %s; error -> %s", page_url, exception) return False - logger.info("Page title after get page -> %s", self.browser.title) + logger.debug("Page title after get page -> %s", self.browser.title) if self.take_screenshots: self.take_screenshot() @@ -161,14 +170,62 @@ def get_page(self, url: str = "") -> bool: # end method definition - def find_elem_and_click(self, find_elem: str, find_method: str = By.ID) -> bool: - """Find an page element and click it. + def get_title(self) -> str: + """Get the browser title. This is handy to validate a certain page is loaded after get_page() + + Returns: + str: Title of the browser window + """ + + if not self.browser: + logger.error("Browser not initialized!") + return None + + return self.browser.title + + # end method definition + + def scroll_to_element(self, element: WebElement): + """Scroll an element into view to make it clickable + + Args: + element (WebElement): Web element that has been identified before + """ + + if not element: + logger.error("Undefined element!") + return + + try: + actions = ActionChains(self.browser) + actions.move_to_element(element).perform() + except NoSuchElementException: + logger.error("Element not found in the DOM") + except TimeoutException: + logger.error("Timed out waiting for the element to be present or visible") + except ElementNotInteractableException: + logger.error("Element is not interactable!") + except MoveTargetOutOfBoundsException: + logger.error("Element is out of bounds!") + except WebDriverException as e: + logger.error("WebDriverException occurred -> %s", str(e)) + + # end method definition + + def find_elem( + self, + find_elem: str, + find_method: str = By.ID, + show_error: bool = True, + ) -> WebElement: + """Find an page element. Args: find_elem (str): name of the page element - find_method (str): either By.ID, By.NAME, By.CLASS_NAME, BY.XPATH + find_method (str, optional): either By.ID, By.NAME, By.CLASS_NAME, BY.XPATH + show_error (bool, optional): show an error if the element is not found or not clickable Returns: - bool: True if successful, False otherwise + WebElement: web element or None in case an error occured. """ # We don't want to expose class "By" outside this module, @@ -183,30 +240,99 @@ def find_elem_and_click(self, find_elem: str, find_method: str = By.ID) -> bool: find_method = By.XPATH else: logger.error("Unsupported find method!") - return False + return None try: elem = self.browser.find_element(by=find_method, value=find_elem) except NoSuchElementException as exception: + if show_error: + logger.error( + "Cannot find page element -> %s by -> %s; error -> %s", + find_elem, + find_method, + exception, + ) + return None + else: + logger.warning( + "Cannot find page element -> %s by -> %s", + find_elem, + find_method, + ) + return None + except TimeoutException as exception: logger.error( - "Cannot find page element -> %s by -> %s; error -> %s", - find_elem, - find_method, + "Timed out waiting for the element to be present or visible; error -> %s", exception, ) + return None + except ElementNotInteractableException as exception: + logger.error("Element is not interactable!; error -> %s", exception) + return None + except MoveTargetOutOfBoundsException: + logger.error("Element is out of bounds!") + return None + except WebDriverException as e: + logger.error("WebDriverException occurred -> %s", str(e)) + return None + + logger.debug("Found page element -> %s by -> %s", find_elem, find_method) + + return elem + + # end method definition + + def find_elem_and_click( + self, + find_elem: str, + find_method: str = By.ID, + scroll_to_element: bool = True, + show_error: bool = True, + ) -> bool: + """Find an page element and click it. + + Args: + find_elem (str): name of the page element + find_method (str, optional): either By.ID, By.NAME, By.CLASS_NAME, BY.XPATH + scroll_to_element (bool, optional): scroll the element into view + show_error (bool, optional): show an error if the element is not found or not clickable + Returns: + bool: True if successful, False otherwise + """ + + if not find_elem: + if show_error: + logger.error("Missing element name! Cannot find HTML element!") + else: + logger.warning("Missing element name! Cannot find HTML element!") return False - logger.info("Found element -> %s by -> %s", find_elem, find_method) + elem = self.find_elem( + find_elem=find_elem, find_method=find_method, show_error=show_error + ) + + if not elem: + return not show_error try: - elem.click() - except ElementClickInterceptedException as exception: - logger.error( - "Cannot click page element -> %s; error -> %s", find_elem, exception - ) - return False + if scroll_to_element: + self.scroll_to_element(elem) - logger.info("Successfully clicked element -> %s", find_elem) + elem.click() + except ( + ElementClickInterceptedException, + ElementNotInteractableException, + ) as exception: + if show_error: + logger.error( + "Cannot click page element -> %s; error -> %s", find_elem, exception + ) + return False + else: + logger.warning("Cannot click page element -> %s", find_elem) + return True + + logger.debug("Successfully clicked element -> %s", find_elem) if self.take_screenshots: self.take_screenshot() @@ -227,42 +353,23 @@ def find_elem_and_set( Args: find_elem (str): name of the page element elem_value (str): new text string for the page element - find_method (str): either By.ID, By.NAME, By.CLASS_NAME, or By.XPATH + find_method (str, optional): either By.ID, By.NAME, By.CLASS_NAME, or By.XPATH + is_sensitive (bool, optional): True for suppressing sensitive information in logging Returns: bool: True if successful, False otherwise """ - # We don't want to expose class "By" outside this module, - # so we map the string values to the By class values: - if find_method == "id": - find_method = By.ID - elif find_method == "name": - find_method = By.NAME - elif find_method == "class_name": - find_method = By.CLASS_NAME - elif find_method == "xpath": - find_method = By.XPATH - else: - logger.error("Unsupported find method!") - return False - - logger.info("Try to find element -> %s by -> %s...", find_elem, find_method) + elem = self.find_elem( + find_elem=find_elem, find_method=find_method, show_error=True + ) - try: - elem = self.browser.find_element(find_method, find_elem) - except NoSuchElementException as exception: - logger.error( - "Cannot find page element -> %s by -> %s; error -> %s", - find_elem, - find_method, - exception, - ) + if not elem: return False if not is_sensitive: - logger.info("Set element -> %s to value -> %s...", find_elem, elem_value) + logger.debug("Set element -> %s to value -> %s...", find_elem, elem_value) else: - logger.info("Set element -> %s to value -> ...", find_elem) + logger.debug("Set element -> %s to value -> ...", find_elem) try: elem.clear() # clear existing text in the input field @@ -322,13 +429,16 @@ def run_login( user_field: str = "otds_username", password_field: str = "otds_password", login_button: str = "loginbutton", + page: str = "", ) -> bool: """Login to target system via the browser""" self.logged_in = False if ( - not self.get_page() # assuming the base URL leads towards the login page + not self.get_page( + url=page + ) # assuming the base URL leads towards the login page or not self.find_elem_and_set( find_elem=user_field, elem_value=self.user_name ) @@ -346,7 +456,7 @@ def run_login( ) return False - logger.info("Page title after login -> %s", self.browser.title) + logger.debug("Page title after login -> %s", self.browser.title) # Some special handling for Salesforce login: if "Verify" in self.browser.title: @@ -375,7 +485,7 @@ def implicit_wait(self, wait_time: float): wait_time (float): time in seconds to wait """ - logger.info("Implicit wait for max -> %s seconds...", str(wait_time)) + logger.debug("Implicit wait for max -> %s seconds...", str(wait_time)) self.browser.implicitly_wait(wait_time) def end_session(self): diff --git a/pyxecm/customizer/customizer.py b/pyxecm/customizer/customizer.py index f5b5650..ded8dd3 100644 --- a/pyxecm/customizer/customizer.py +++ b/pyxecm/customizer/customizer.py @@ -59,7 +59,7 @@ # OpenText specific modules: import yaml -from pyxecm import OTAC, OTCS, OTDS, OTIV, OTPD +from pyxecm import OTAC, OTCS, OTDS, OTIV, OTPD, OTMM, CoreShare from pyxecm.customizer.k8s import K8s from pyxecm.customizer.m365 import M365 from pyxecm.customizer.payload import Payload @@ -74,7 +74,7 @@ class CustomizerSettings: """Class to manage settings""" placeholder_values: dict = field(default_factory=dict) - stop_on_error: bool = os.environ.get("LOGLEVEL", "INFO") == "DEBUG" + stop_on_error: bool = os.environ.get("STOP_ON_ERROR", "false").lower() == "true" cust_log_file: str = "/tmp/customizing.log" customizer_start_time = customizer_end_time = datetime.now() @@ -123,6 +123,7 @@ class CustomizerSettingsOTCS: port: int = os.environ.get("OTCS_SERVICE_PORT_OTCS", 8080) port_backend: int = os.environ.get("OTCS_SERVICE_PORT_OTCS", 8080) port_frontend: int = 80 + base_path: str = "/cs/cs" admin: str = os.environ.get("OTCS_ADMIN", "admin") password: str = os.environ.get("OTCS_PASSWORD") partition: str = os.environ.get("OTCS_PARTITION", "Content Server Members") @@ -142,7 +143,11 @@ class CustomizerSettingsOTCS: replicas_frontend = 0 replicas_backend = 0 + # Add configuration options for Customizer behaviour update_admin_user: bool = True + upload_config_files: bool = True + upload_status_files: bool = True + upload_log_file: bool = True @dataclass @@ -231,7 +236,23 @@ class CustomizerSettingsM365: password: str = os.environ.get("O365_PASSWORD", "") domain: str = os.environ.get("O365_DOMAIN", "") sku_id: str = os.environ.get("O365_SKU_ID", "c7df2760-2c81-4ef7-b578-5b5392b571df") - teams_app_name: str = "OpenText Extended ECM" + teams_app_name: str = os.environ.get("O365_TEAMS_APP_NAME", "OpenText Extended ECM") + teams_app_external_id: str = os.environ.get( + "O365_TEAMS_APP_ID", "dd4af790-d8ff-47a0-87ad-486318272c7a" + ) + + +@dataclass +class CustomizerSettingsCoreShare: + """Class for Core Share related settings""" + + enabled: bool = os.environ.get("CORE_SHARE_ENABLED", "false").lower() == "true" + base_url: str = os.environ.get("CORE_SHARE_BASE_URL", "https://core.opentext.com") + sso_url: str = os.environ.get("CORE_SHARE_SSO_URL", "https://sso.core.opentext.com") + client_id: str = os.environ.get("CORE_SHARE_CLIENT_ID", "") + client_secret = os.environ.get("CORE_SHARE_CLIENT_SECRET", "") + username: str = os.environ.get("CORE_SHARE_USERNAME", "") + password: str = os.environ.get("CORE_SHARE_PASSWORD", "") @dataclass @@ -258,6 +279,7 @@ def __init__( k8s: CustomizerSettingsK8S = CustomizerSettingsK8S(), otawp: CustomizerSettingsOTAWP = CustomizerSettingsOTAWP(), m365: CustomizerSettingsM365 = CustomizerSettingsM365(), + core_share: CustomizerSettingsCoreShare = CustomizerSettingsCoreShare(), aviator: CustomizerSettingsAviator = CustomizerSettingsAviator(), ): self.settings = settings @@ -286,6 +308,9 @@ def __init__( # Microsoft 365 Environment variables: self.m365_settings = m365 + # Core Share Environment variables: + self.core_share_settings = core_share + # Aviator variables: self.aviator_settings = aviator @@ -299,15 +324,18 @@ def __init__( self.otiv_object: OTIV | None = None self.k8s_object: K8s | None = None self.m365_object: M365 | None = None + self.core_share_object: CoreShare | None = None self.browser_automation_object: BrowserAutomation | None = None - def log_header(self, text: str, char: str = "=", length: int = 60): + # end initializer + + def log_header(self, text: str, char: str = "=", length: int = 80): """Helper method to output a section header in the log file Args: - text (str): _description_ + text (str): Headline text to output into the log file. char (str, optional): header line character. Defaults to "=". - length (int, optional): maxium length. Defaults to 60. + length (int, optional): maxium length. Defaults to 80. Returns: None """ @@ -329,7 +357,7 @@ def log_header(self, text: str, char: str = "=", length: int = 60): "%s %s %s", char * char_count, text, char * (char_count + extra_char) ) - # end function definition + # end method definition def init_m365(self) -> M365: """Initialize the M365 object we use to talk to the Microsoft Graph API. @@ -373,9 +401,13 @@ def init_m365(self) -> M365: "Microsoft 365 Default License SKU = %s", self.m365_settings.sku_id ) logger.info( - "Microsoft 365 Teams App = %s", + "Microsoft 365 Teams App Name = %s", self.m365_settings.teams_app_name, ) + logger.info( + "Microsoft 365 Teams App External ID = %s", + self.m365_settings.teams_app_external_id, + ) m365_object = M365( tenant_id=self.m365_settings.tenant_id, @@ -384,16 +416,265 @@ def init_m365(self) -> M365: domain=self.m365_settings.domain, sku_id=self.m365_settings.sku_id, teams_app_name=self.m365_settings.teams_app_name, + teams_app_external_id=self.m365_settings.teams_app_external_id, ) if m365_object and m365_object.authenticate(): logger.info("Connected to Microsoft Graph API.") - return m365_object else: logger.error("Failed to connect to Microsoft Graph API.") return m365_object - # end function definition + logger.info( + "Download M365 Teams App -> '%s' (external ID = %s) from Extended ECM (OTCS)...", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + ) + + # Download MS Teams App from OTCS (this has with 23.2 a nasty side-effect + # of unsetting 2 checkboxes on that config page - we reset these checkboxes + # with the settings file "O365Settings.xml"): + response = self.otcs_frontend_object.download_config_file( + "/cs/cs?func=officegroups.DownloadTeamsPackage", + "/tmp/ot.xecm.teams.zip", + ) + # this app upload will be done with the user credentials - this is required: + m365_object.authenticate_user( + self.m365_settings.user, self.m365_settings.password + ) + + # Check if the app is already installed in the apps catalog + # ideally we want to use the + app_exist = False + + # If the App External ID is provided via Env variable then we + # prefer to use it instead of the App name: + if self.m365_settings.teams_app_external_id: + logger.info( + "Check if M365 Teams App -> '%s' (%s) is already installed in catalog using external app ID...", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + ) + response = m365_object.get_teams_apps( + filter_expression="externalId eq '{}'".format( + self.m365_settings.teams_app_external_id + ) + ) + # this should always be True as ID is unique: + app_exist = m365_object.exist_result_item( + response=response, + key="externalId", + value=self.m365_settings.teams_app_external_id, + ) + # If the app could not be found via the external ID we fall back to + # search for the app by name: + if not app_exist: + if self.m365_settings.teams_app_external_id: + logger.info( + "Could not find M365 Teams App using the external ID -> %s. Try to lookup the app by name -> '%s' instead...", + self.m365_settings.teams_app_external_id, + self.m365_settings.teams_app_name, + ) + logger.info( + "Check if M365 Teams App -> '%s' is already installed in catalog (using app name)...", + self.m365_settings.teams_app_name, + ) + response = m365_object.get_teams_apps( + filter_expression="contains(displayName, '{}')".format( + self.m365_settings.teams_app_name + ) + ) + app_exist = m365_object.exist_result_item( + response=response, + key="displayName", + value=self.m365_settings.teams_app_name, + ) + if app_exist: + # We double check that we have the effective name of the app + # in the catalog to avoid errors when the app is looked up + # by its wrong name in the customizer automation. This can + # happen if the app is installed manually or the environment + # variable is set to a wrong name. + app_catalog_name = m365_object.get_result_value(response, "displayName") + if app_catalog_name != self.m365_settings.teams_app_name: + logger.warning( + "The Extended ECM app name -> '%s' in the M365 Teams catalog does not match the defined app name '%s'! Somebody must have manually installed the app with the wrong name!", + app_catalog_name, + self.m365_settings.teams_app_name, + ) + # Align the name in the settings dict with the existing name in the catalog. + self.m365_settings.teams_app_name = app_catalog_name + # Align the name in the M365 object config dict with the existing name in the catalog. + m365_object.config()["teamsAppName"] = app_catalog_name + app_internal_id = m365_object.get_result_value( + response=response, key="id", index=0 + ) # 0 = Index = first item + # Store the internal ID for later use + m365_object.config()["teamsAppInternalId"] = app_internal_id + app_catalog_version = m365_object.get_result_value( + response=response, + key="version", + index=0, + sub_dict_name="appDefinitions", + ) + logger.info( + "M365 Teams App -> '%s' (external ID = %s) is already in app catalog with app internal ID -> %s and version -> %s. Check if we have a newer version to upload...", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + app_internal_id, + app_catalog_version, + ) + app_download_version = m365_object.extract_version_from_app_manifest( + app_path="/tmp/ot.xecm.teams.zip" + ) + if app_catalog_version < app_download_version: + logger.info( + "Upgrading Extended ECM Teams App in catalog from version -> %s to version -> %s...", + app_catalog_version, + app_download_version, + ) + response = m365_object.upload_teams_app( + app_path="/tmp/ot.xecm.teams.zip", + update_existing_app=True, + app_catalog_id=app_internal_id, + ) + app_internal_id = m365_object.get_result_value( + response=response, + key="teamsAppId", + ) + if app_internal_id: + logger.info( + "Successfully upgraded Extended ECM Teams App -> %s (external ID = %s). Internal App ID -> %s", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + app_internal_id, + ) + # Store the internal ID for later use + m365_object.config()["teamsAppInternalId"] = app_internal_id + else: + logger.error( + "Failed to upgrade Extended ECM Teams App -> %s (external ID = %s).", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + ) + else: + logger.info( + "No upgrade required. The downloaded version -> %s is not newer than the version -> %s which is already in the M365 app catalog.", + app_download_version, + app_catalog_version, + ) + else: # Extended ECM M365 Teams app is not yet installed... + logger.info( + "Extended Teams ECM App -> '%s' (external ID = %s) is not yet in app catalog. Installing as new app...", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + ) + response = m365_object.upload_teams_app( + app_path="/tmp/ot.xecm.teams.zip", update_existing_app=False + ) + app_internal_id = m365_object.get_result_value( + response=response, + key="id", # for new installs it is NOT "teamsAppId" but "id" as we use a different M365 Graph API endpoint !!! + ) + if app_internal_id: + logger.info( + "Successfully installed Extended ECM Teams App -> '%s' (external ID = %s). Internal App ID -> %s", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + app_internal_id, + ) + # Store the internal ID for later use + m365_object.config()["teamsAppInternalId"] = app_internal_id + else: + logger.error( + "Failed to install Extended ECM Teams App -> '%s' (external ID = %s).", + self.m365_settings.teams_app_name, + self.m365_settings.teams_app_external_id, + ) + + # logger.info("======== Upload Outlook Add-In ============") + + # # Download MS Outlook Add-In from OTCS: + # MANIFEST_FILE = "/tmp/BusinessWorkspace.Manifest.xml" + # if not self.otcs_frontend_object.download_config_file( + # "/cs/cs?func=outlookaddin.DownloadManifest", + # MANIFEST_FILE, + # "DeployedContentServer", + # self.otcs_settings.public_url, + # ): + # logger.error("Failed to download M365 Outlook Add-In from Extended ECM!") + # else: + # # THIS IS NOT IMPLEMENTED DUE TO LACK OF M365 GRAPH API SUPPORT! + # # Do it manually for now: https://admin.microsoft.com/#/Settings/IntegratedApps + # logger.info("Successfully downloaded M365 Outlook Add-In from Extended ECM to %s", MANIFEST_FILE) + # m365_object.upload_outlook_app(MANIFEST_FILE) + + return m365_object + + # end method definition + + def init_coreshare(self) -> M365: + """Initialize the Core Share object we use to talk to the Core Share API. + + Args: + None + Returns: + object: CoreShare object or None if the object couldn't be created or + the authentication fails. + """ + + logger.info( + "Core Share Base URL = %s", self.core_share_settings.base_url + ) + logger.info( + "Core Share SSO URL = %s", self.core_share_settings.sso_url + ) + logger.info( + "Core Share Client ID = %s", self.core_share_settings.client_id + ) + logger.debug( + "Core Share Client Secret = %s", + self.core_share_settings.client_secret, + ) + logger.info( + "Core Share User = %s", + ( + self.core_share_settings.username + if self.core_share_settings.username != "" + else "" + ), + ) + logger.debug( + "Core Share Password = %s", + ( + self.core_share_settings.password + if self.core_share_settings.password != "" + else "" + ), + ) + + core_share_object = CoreShare( + base_url=self.core_share_settings.base_url, + sso_url=self.core_share_settings.sso_url, + client_id=self.core_share_settings.client_id, + client_secret=self.core_share_settings.client_secret, + username=self.core_share_settings.username, + password=self.core_share_settings.password, + ) + + if core_share_object and core_share_object.authenticate_admin(): + logger.info("Connected to Core Share as Tenant Admin.") + else: + logger.error("Failed to connect to Core Share as Tenant Admin.") + + if core_share_object and core_share_object.authenticate_user(): + logger.info("Connected to Core Share as Tenant Service User.") + else: + logger.error("Failed to connect to Core Share as Tenant Service User.") + + return core_share_object + + # end method definition def init_k8s(self) -> K8s: """Initialize the Kubernetes object we use to talk to the Kubernetes API. @@ -407,10 +688,10 @@ def init_k8s(self) -> K8s: """ logger.info("Connection parameters Kubernetes (K8s):") - logger.info("K8s inCluster -> %s", self.k8s_settings.in_cluster) - logger.info("K8s namespace -> %s", self.k8s_settings.namespace) + logger.info("K8s inCluster = %s", self.k8s_settings.in_cluster) + logger.info("K8s namespace = %s", self.k8s_settings.namespace) logger.info( - "K8s kubeconfig file -> %s", + "K8s kubeconfig file = %s", self.k8s_settings.kubeconfig_file, ) @@ -430,14 +711,14 @@ def init_k8s(self) -> K8s: ) if not otcs_frontend_scale: logger.error( - "Cannot find Kubernetes Stateful Set for OTCS Frontends -> %s", + "Cannot find Kubernetes Stateful Set -> '%s' for OTCS Frontends!", self.otcs_settings.k8s_statefulset_frontend, ) sys.exit() self.otcs_settings.replicas_frontend = otcs_frontend_scale.spec.replicas # type: ignore logger.info( - "Stateful Set -> %s has -> %s replicas", + "Stateful Set -> '%s' has -> %s replicas", self.otcs_settings.k8s_statefulset_frontend, self.otcs_settings.replicas_frontend, ) @@ -448,21 +729,21 @@ def init_k8s(self) -> K8s: ) if not otcs_backend_scale: logger.error( - "Cannot find Kubernetes Stateful Set for OTCS Backends -> %s", + "Cannot find Kubernetes Stateful Set -> '%s' for OTCS Backends!", self.otcs_settings.k8s_statefulset_backend, ) sys.exit() self.otcs_settings.replicas_backend = otcs_backend_scale.spec.replicas # type: ignore logger.info( - "Stateful Set -> %s has -> %s replicas", + "Stateful Set -> '%s' has -> %s replicas", self.otcs_settings.k8s_statefulset_backend, self.otcs_settings.replicas_backend, ) return k8s_object - # end function definition + # end method definition def init_otds(self) -> OTDS: """Initialize the OTDS object and parameters and authenticate at OTDS once it is ready. @@ -519,7 +800,7 @@ def init_otds(self) -> OTDS: return otds_object - # end function definition + # end method definition def init_otac(self) -> OTAC: """Initialize the OTAC object and parameters. @@ -557,6 +838,16 @@ def init_otac(self) -> OTAC: self.otds_settings.password, ) + # This is a work-around as OTCS container automation is not + # enabling the certificate reliable. + response = otac_object.enable_certificate( + cert_name="SP_otcs-admin-0", cert_type="ARC" + ) + if not response: + logger.error("Failed to enable OTAC certificate for Extended ECM!") + else: + logger.info("Successfully enabled OTAC certificate for Extended ECM!") + # is there a known server configured for Archive Center (to sync content with) if otac_object and self.otac_settings.known_server != "": # wait until the OTAC pod is in ready state @@ -587,7 +878,7 @@ def init_otac(self) -> OTAC: return otac_object - # end function definition + # end method definition def init_otcs( self, @@ -646,6 +937,7 @@ def init_otcs( partition_name, resource_name, otds_ticket=otds_ticket, + base_path=self.otcs_settings.base_path, ) # It is important to wait for OTCS to be configured - otherwise we @@ -666,17 +958,17 @@ def init_otcs( otcs_cookie = otcs_object.authenticate() logger.info("OTCS is ready now.") - if self.otcs_settings.update_admin_user: - # Set first name and last name of Admin user (ID = 1000): - otcs_object.update_user(1000, field="first_name", value="Terrarium") - otcs_object.update_user(1000, field="last_name", value="Admin") + # if self.otcs_settings.update_admin_user: + # Set first name and last name of Admin user (ID = 1000): + # otcs_object.update_user(1000, field="first_name", value="Terrarium") + # otcs_object.update_user(1000, field="last_name", value="Admin") if "OTCS_RESSOURCE_ID" not in self.settings.placeholder_values: - self.settings.placeholder_values[ - "OTCS_RESSOURCE_ID" - ] = self.otds_object.get_resource(self.otcs_settings.resource_name)[ - "resourceID" - ] + self.settings.placeholder_values["OTCS_RESSOURCE_ID"] = ( + self.otds_object.get_resource(self.otcs_settings.resource_name)[ + "resourceID" + ] + ) logger.debug( "Placeholder values after OTCS init = %s", self.settings.placeholder_values, @@ -686,9 +978,9 @@ def init_otcs( otcs_resource = self.otds_object.get_resource( self.otcs_settings.resource_name ) - otcs_resource[ - "logoutURL" - ] = f"{self.otawp_settings.public_protocol}://{self.otawp_settings.public_url}/home/system/wcp/sso/sso_logout.htm" + otcs_resource["logoutURL"] = ( + f"{self.otawp_settings.public_protocol}://{self.otawp_settings.public_url}/home/system/wcp/sso/sso_logout.htm" + ) otcs_resource["logoutMethod"] = "GET" self.otds_object.update_resource(name="cs", resource=otcs_resource) @@ -698,7 +990,7 @@ def init_otcs( return otcs_object - # end function definition + # end method definition def init_otiv(self) -> OTIV | None: """Initialize the OTIV (Intelligent Viewing) object and its OTDS settings. @@ -750,9 +1042,27 @@ def init_otiv(self) -> OTIV | None: ) return None + # Workaround for VAT-4580 (24.2.0) + update_publisher = self.otds_object.update_user( + partition="Content Server Service Users", + user_id="iv-publisher", + attribute_name="oTType", + attribute_value="ServiceUser", + ) + while update_publisher is None: + update_publisher = self.otds_object.update_user( + partition="Content Server Service Users", + user_id="iv-publisher", + attribute_name="oTType", + attribute_value="ServiceUser", + ) + time.sleep(30) + + logger.info("OTDS user iv-publisher -> updating oTType=ServiceUser") + return otiv_object - # end function definition + # end method definition def init_otpd(self) -> OTPD: """Initialize the OTPD (PowerDocs) object and parameters. @@ -829,7 +1139,7 @@ def init_otawp(self): logger.info("OTAWP K8s Config Map = %s", self.otawp_settings.k8s_configmap) logger.info( - "Wait for OTCS to create its OTDS resource with name -> %s...", + "Wait for OTCS to create its OTDS resource with name -> '%s'...", self.otcs_settings.resource_name, ) @@ -838,7 +1148,7 @@ def init_otawp(self): otcs_resource = self.otds_object.get_resource(self.otcs_settings.resource_name) while otcs_resource is None: logger.warning( - "OTDS resource for Content Server with name -> %s does not exist yet. Waiting...", + "OTDS resource for Content Server with name -> '%s' does not exist yet. Waiting...", self.otcs_settings.resource_name, ) time.sleep(30) @@ -1150,7 +1460,7 @@ def init_otawp(self): ) while otcs_partition is None: logger.warning( - "OTDS user partition for Content Server with name -> %s does not exist yet. Waiting...", + "OTDS user partition for Content Server with name -> '%s' does not exist yet. Waiting...", self.otcs_settings.partition, ) @@ -1188,7 +1498,7 @@ def init_otawp(self): # check if the license file exists, otherwise skip for versions pre 24.1 if os.path.isfile(self.otawp_settings.license_file): logger.info( - "OTAWP license file (%s) found, assiging to ressource %s", + "Found OTAWP license file -> '%s', assiging it to ressource '%s'...", self.otawp_settings.license_file, self.otawp_settings.resource_name, ) @@ -1201,14 +1511,14 @@ def init_otawp(self): ) if not otawp_license: logger.error( - "Couldn't apply license -> %s for product -> %s to OTDS resource -> %s", + "Couldn't apply license -> '%s' for product -> '%s' to OTDS resource -> '%s'", self.otawp_settings.license_file, self.otawp_settings.product_name, awp_resource["resourceID"], ) else: logger.info( - "Successfully applied license -> %s for product -> %s to OTDS resource -> %s", + "Successfully applied license -> '%s' for product -> '%s' to OTDS resource -> '%s'", self.otawp_settings.license_file, self.otawp_settings.product_name, awp_resource["resourceID"], @@ -1237,20 +1547,20 @@ def init_otawp(self): ) if not assigned_license: logger.error( - "Partition -> %s could not be assigned to license -> %s (%s)", + "Partition -> '%s' could not be assigned to license -> '%s' (%s)", partition_name, self.otawp_settings.product_name, "USERS", ) else: logger.info( - "Partition -> %s successfully assigned to license -> %s (%s)", + "Partition -> '%s' successfully assigned to license -> '%s' (%s)", partition_name, self.otawp_settings.product_name, "USERS", ) - # end function definition + # end method definition def restart_otcs_service(self, otcs_object: OTCS, extra_wait_time: int = 60): """Restart the Content Server service in all OTCS pods @@ -1273,11 +1583,11 @@ def restart_otcs_service(self, otcs_object: OTCS, extra_wait_time: int = 60): for x in range(0, self.otcs_settings.replicas_frontend): pod_name = self.otcs_settings.k8s_statefulset_frontend + "-" + str(x) - logger.info("Deactivate Liveness probe for pod -> %s", pod_name) + logger.info("Deactivate Liveness probe for pod -> '%s'", pod_name) self.k8s_object.exec_pod_command( pod_name, ["/bin/sh", "-c", "touch /tmp/keepalive"] ) - logger.info("Restarting pod -> %s", pod_name) + logger.info("Restarting pod -> '%s'", pod_name) self.k8s_object.exec_pod_command( pod_name, ["/bin/sh", "-c", "/opt/opentext/cs/stop_csserver"] ) @@ -1289,11 +1599,11 @@ def restart_otcs_service(self, otcs_object: OTCS, extra_wait_time: int = 60): for x in range(0, self.otcs_settings.replicas_backend): pod_name = self.otcs_settings.k8s_statefulset_backend + "-" + str(x) - logger.info("Deactivate Liveness probe for pod -> %s", pod_name) + logger.info("Deactivate Liveness probe for pod -> '%s'", pod_name) self.k8s_object.exec_pod_command( pod_name, ["/bin/sh", "-c", "touch /tmp/keepalive"] ) - logger.info("Restarting pod -> %s", pod_name) + logger.info("Restarting pod -> '%s'", pod_name) self.k8s_object.exec_pod_command( pod_name, ["/bin/sh", "-c", "/opt/opentext/cs/stop_csserver"] ) @@ -1313,7 +1623,7 @@ def restart_otcs_service(self, otcs_object: OTCS, extra_wait_time: int = 60): for x in range(0, self.otcs_settings.replicas_frontend): pod_name = self.otcs_settings.k8s_statefulset_frontend + "-" + str(x) - logger.info("Reactivate Liveness probe for pod -> %s", pod_name) + logger.info("Reactivate Liveness probe for pod -> '%s'", pod_name) self.k8s_object.exec_pod_command( pod_name, ["/bin/sh", "-c", "rm /tmp/keepalive"] ) @@ -1321,7 +1631,7 @@ def restart_otcs_service(self, otcs_object: OTCS, extra_wait_time: int = 60): for x in range(0, self.otcs_settings.replicas_backend): pod_name = self.otcs_settings.k8s_statefulset_backend + "-" + str(x) - logger.info("Reactivate Liveness probe for pod -> %s", pod_name) + logger.info("Reactivate Liveness probe for pod -> '%s'", pod_name) self.k8s_object.exec_pod_command( pod_name, ["/bin/sh", "-c", "rm /tmp/keepalive"] ) @@ -1337,7 +1647,7 @@ def restart_otcs_service(self, otcs_object: OTCS, extra_wait_time: int = 60): time.sleep(extra_wait_time) logger.info("Continue customizing...") - # end function definition + # end method definition def restart_otac_service(self) -> bool: """Restart the Archive Center spawner service in OTAC pod @@ -1352,7 +1662,7 @@ def restart_otac_service(self) -> bool: return False logger.info( - "Restarting spawner service in Archive Center pod -> %s", + "Restarting spawner service in Archive Center pod -> '%s'", self.otac_settings.k8s_pod_name, ) # The Archive Center Spawner needs to be run in "interactive" mode - otherwise the command will "hang": @@ -1370,7 +1680,7 @@ def restart_otac_service(self) -> bool: else: return False - # end function definition + # end method definition def restart_otawp_pod(self): """Delete the AppWorks Platform Pod to make Kubernetes restart it. @@ -1382,7 +1692,7 @@ def restart_otawp_pod(self): self.k8s_object.delete_pod(self.otawp_settings.k8s_statefulset + "-0") - # end function definition + # end method definition def consolidate_otds(self): """Consolidate OTDS resources @@ -1395,7 +1705,7 @@ def consolidate_otds(self): if self.otawp_settings.enabled: # is AppWorks Platform deployed? self.otds_object.consolidate(self.otawp_settings.resource_name) - # end function definition + # end method definition def import_powerdocs_configuration(self, otpd_object: OTPD): """Import a database export (zip file) into the PowerDocs database @@ -1408,7 +1718,7 @@ def import_powerdocs_configuration(self, otpd_object: OTPD): # Download file from remote location specified by the OTPD_DBIMPORTFILE # this must be a public place without authentication: logger.info( - "Download PowerDocs database file from URL -> %s", + "Download PowerDocs database file from URL -> '%s'", self.otpd_settings.db_importfile, ) @@ -1416,7 +1726,7 @@ def import_powerdocs_configuration(self, otpd_object: OTPD): package = requests.get(self.otpd_settings.db_importfile, timeout=60) package.raise_for_status() logger.info( - "Successfully downloaded PowerDocs database file -> %s; status code -> %s", + "Successfully downloaded PowerDocs database file -> '%s'; status code -> %s", self.otpd_settings.db_importfile, package.status_code, ) @@ -1437,7 +1747,7 @@ def import_powerdocs_configuration(self, otpd_object: OTPD): except requests.exceptions.HTTPError as err: logger.error("Request error -> %s", err) - # end function definition + # end method definition def set_maintenance_mode(self, enable: bool = True): """Enable or Disable Maintenance Mode @@ -1475,12 +1785,14 @@ def set_maintenance_mode(self, enable: bool = True): ) logger.info("OTCS frontend is now back in Production Mode!") + # end method definition + def customization_run(self): """Central function to initiate the customization""" # Set Timer for duration calculation - self.settings.customizer_start_time = ( - self.settings.customizer_end_time - ) = datetime.now() + self.settings.customizer_start_time = self.settings.customizer_end_time = ( + datetime.now() + ) # Initialize the OTDS, OTCS and OTPD objects and wait for the # pods to be ready. If any of this fails we bail out: @@ -1563,119 +1875,40 @@ def customization_run(self): else: self.otpd_object = None + if self.core_share_settings.enabled: # is Core Share enabled? + self.log_header("Initialize Core Share") + + self.core_share_object = self.init_coreshare() + if not self.core_share_object: + logger.error("Failed to initialize Core Share - exiting...") + sys.exit() + else: + self.core_share_object = None + if ( self.m365_settings.enabled and self.m365_settings.user != "" and self.m365_settings.password != "" ): # is M365 enabled? - self.log_header("Initialize MS Graph API") + self.log_header("Initialize Microsoft 365") # Initialize the M365 object and connection to M365 Graph API: self.m365_object = self.init_m365() - - self.log_header("Upload MS Teams App") - - # Download MS Teams App from OTCS (this has with 23.2 a nasty side-effect - # of unsetting 2 checkboxes on that config page - we reset these checkboxes - # with the settings file "O365Settings.xml"): - response = self.otcs_frontend_object.download_config_file( - "/cs/cs?func=officegroups.DownloadTeamsPackage", - "/tmp/ot.xecm.teams.zip", - ) - # this app upload will be done with the user credentials - this is required: - self.m365_object.authenticate_user( - self.m365_settings.user, self.m365_settings.password - ) - - # Check if the app is already installed in the apps catalog: - response = self.m365_object.get_teams_apps( - f"contains(displayName, '{self.m365_settings.teams_app_name}')" - ) - if self.m365_object.exist_result_item( - response, "displayName", self.m365_settings.teams_app_name - ): - app_catalog_id = self.m365_object.get_result_value( - response=response, key="id", index=0 - ) # 0 = Index = first item - app_catalog_version = self.m365_object.get_result_value( - response=response, - key="version", - index=0, - sub_dict_name="appDefinitions", - ) - logger.info( - "Extended ECM Teams App is already in app catalog with app catalog ID -> %s and version -> %s. Check if we have a newer version to upload...", - app_catalog_id, - app_catalog_version, - ) - app_upload_version = self.m365_object.extract_version_from_app_manifest( - app_path="/tmp/ot.xecm.teams.zip" - ) - if app_catalog_version < app_upload_version: - logger.info( - "Upgrading Extended ECM Teams App in catalog from version -> %s to version -> %s...", - app_catalog_version, - app_upload_version, - ) - response = self.m365_object.upload_teams_app( - app_path="/tmp/ot.xecm.teams.zip", - update_existing_app=True, - app_catalog_id=app_catalog_id, - ) - else: - logger.info( - "No upgrade required. The upload version -> %s is not newer than the version -> %s which is in the M365 app catalog.", - app_upload_version, - app_catalog_version, - ) - else: - logger.info( - "Extended Teams ECM App is not yet in app catalog. Installing as new app..." - ) - response = self.m365_object.upload_teams_app( - app_path="/tmp/ot.xecm.teams.zip" - ) - - # logger.info("======== Upload Outlook Add-In ============") - - # # Download MS Outlook Add-In from OTCS: - # MANIFEST_FILE = "/tmp/BusinessWorkspace.Manifest.xml" - # if not self.otcs_frontend_object.download_config_file( - # "/cs/cs?func=outlookaddin.DownloadManifest", - # MANIFEST_FILE, - # "DeployedContentServer", - # self.otcs_settings.public_url, - # ): - # logger.error("Failed to download M365 Outlook Add-In from Extended ECM!") - # else: - # # THIS IS NOT IMPLEMENTED DUE TO LACK OF M365 GRAPH API SUPPORT! - # # Do it manually for now: https://admin.microsoft.com/#/Settings/IntegratedApps - # logger.info("Successfully downloaded M365 Outlook Add-In from Extended ECM to %s", MANIFEST_FILE) - # self.m365_object.upload_outlook_app(MANIFEST_FILE) - else: - self.m365_object = None - - # self.log_header("Initialize Browser Automation...") - - # We initialize a Selenium based browser automation for - # those die-hard settings that cannot be automated via REST API - # nor LLConfig nor Transport: - # self.browser_automation_object = self.init_browser_automation() - # if not self.browser_automation_object: - # logger.error("Failed to initialize Browser Automation - exiting...") - # sys.exit() + if not self.m365_object: + logger.error("Failed to initialize Microsoft 365!") + sys.exit() self.log_header("Processing Payload") cust_payload_list = [] # Is uncompressed payload provided? if os.path.exists(self.settings.cust_payload): - logger.info("Found payload file -> %s", self.settings.cust_payload) + logger.info("Found payload file -> '%s'", self.settings.cust_payload) cust_payload_list.append(self.settings.cust_payload) # Is compressed payload provided? if os.path.exists(self.settings.cust_payload_gz): logger.info( - "Found compressed payload file -> %s", self.settings.cust_payload_gz + "Found compressed payload file -> '%s'", self.settings.cust_payload_gz ) cust_payload_list.append(self.settings.cust_payload_gz) @@ -1683,16 +1916,16 @@ def customization_run(self): if os.path.exists(self.settings.cust_payload_external): for filename in os.scandir(self.settings.cust_payload_external): if filename.is_file() and os.path.getsize(filename) > 0: - logger.info("Found external payload file -> %s", filename.path) + logger.info("Found external payload file -> '%s'", filename.path) cust_payload_list.append(filename.path) else: logger.info( - "No external payload file -> %s", self.settings.cust_payload_external + "No external payload file -> '%s'", self.settings.cust_payload_external ) for cust_payload in cust_payload_list: # Open the payload file. If this fails we bail out: - logger.info("Starting processing of payload -> %s", cust_payload) + logger.info("Starting processing of payload -> '%s'", cust_payload) # Set startTime for duration calculation start_time = datetime.now() @@ -1708,11 +1941,13 @@ def customization_run(self): otcs_restart_callback=self.restart_otcs_service, otiv_object=self.otiv_object, m365_object=self.m365_object, + core_share_object=self.core_share_object, browser_automation_object=self.browser_automation_object, placeholder_values=self.settings.placeholder_values, # this dict includes placeholder replacements for the Ressource IDs of OTAWP and OTCS log_header_callback=self.log_header, stop_on_error=self.settings.stop_on_error, aviator_enabled=self.aviator_settings.enabled, + upload_status_files=self.otcs_settings.upload_status_files, ) # Load the payload file and initialize the payload sections: if not payload_object.init_payload(): @@ -1728,50 +1963,53 @@ def customization_run(self): self.consolidate_otds() # Upload payload file for later review to Enterprise Workspace - self.log_header("Upload Payload file to Extended ECM") - response = self.otcs_backend_object.get_node_from_nickname( - self.settings.cust_target_folder_nickname - ) - target_folder_id = self.otcs_backend_object.get_result_value(response, "id") - if not target_folder_id: - target_folder_id = 2000 # use Enterprise Workspace as fallback - # Write YAML file with upadated payload (including IDs, etc.). - # We need to write to /tmp as initial location is read-only: - payload_file = os.path.basename(cust_payload) - payload_file = ( - payload_file[: -len(".gz.b64")] - if payload_file.endswith(".gz.b64") - else payload_file - ) - cust_payload = "/tmp/" + payload_file + if self.otcs_settings.upload_config_files: + self.log_header("Upload Payload file to Extended ECM") + response = self.otcs_backend_object.get_node_from_nickname( + self.settings.cust_target_folder_nickname + ) + target_folder_id = self.otcs_backend_object.get_result_value( + response, "id" + ) + if not target_folder_id: + target_folder_id = 2000 # use Enterprise Workspace as fallback + # Write YAML file with upadated payload (including IDs, etc.). + # We need to write to /tmp as initial location is read-only: + payload_file = os.path.basename(cust_payload) + payload_file = ( + payload_file[: -len(".gz.b64")] + if payload_file.endswith(".gz.b64") + else payload_file + ) + cust_payload = "/tmp/" + payload_file - with open(cust_payload, "w", encoding="utf-8") as file: - yaml.dump(payload_object.get_payload(), file) + with open(cust_payload, "w", encoding="utf-8") as file: + yaml.dump(payload_object.get_payload(), file) - # Check if the payload file has been uploaded before. - # This can happen if we re-run the python container. - # In this case we add a version to the existing document: - response = self.otcs_backend_object.get_node_by_parent_and_name( - int(target_folder_id), os.path.basename(cust_payload) - ) - target_document_id = self.otcs_backend_object.get_result_value( - response, "id" - ) - if target_document_id: - response = self.otcs_backend_object.add_document_version( - int(target_document_id), - cust_payload, - os.path.basename(cust_payload), - "text/plain", - "Updated payload file after re-run of customization", + # Check if the payload file has been uploaded before. + # This can happen if we re-run the python container. + # In this case we add a version to the existing document: + response = self.otcs_backend_object.get_node_by_parent_and_name( + int(target_folder_id), os.path.basename(cust_payload) ) - else: - response = self.otcs_backend_object.upload_file_to_parent( - cust_payload, - os.path.basename(cust_payload), - "text/plain", - int(target_folder_id), + target_document_id = self.otcs_backend_object.get_result_value( + response, "id" ) + if target_document_id: + response = self.otcs_backend_object.add_document_version( + int(target_document_id), + cust_payload, + os.path.basename(cust_payload), + "text/plain", + "Updated payload file after re-run of customization", + ) + else: + response = self.otcs_backend_object.upload_file_to_parent( + cust_payload, + os.path.basename(cust_payload), + "text/plain", + int(target_folder_id), + ) duration = datetime.now() - start_time self.log_header( @@ -1819,9 +2057,11 @@ def customization_run(self): self.otds_object.impersonate_resource(self.otawp_settings.resource_name) # Upload log file for later review to "Deployment" folder in "Administration" folder - if os.path.exists(self.settings.cust_log_file): + if ( + os.path.exists(self.settings.cust_log_file) + and self.otcs_settings.upload_log_file + ): self.log_header("Upload log file to Extended ECM") - # logger.info("========== Upload log file to Extended ECM =============") response = self.otcs_backend_object.get_node_from_nickname( self.settings.cust_target_folder_nickname ) @@ -1839,18 +2079,19 @@ def customization_run(self): ) if target_document_id: response = self.otcs_backend_object.add_document_version( - int(target_document_id), - self.settings.cust_log_file, - os.path.basename(self.settings.cust_log_file), - "text/plain", - "Updated Python Log after re-run of customization", + node_id=int(target_document_id), + file_url=self.settings.cust_log_file, + file_name=os.path.basename(self.settings.cust_log_file), + mime_type="text/plain", + description="Updated Python Log after re-run of customization", ) else: response = self.otcs_backend_object.upload_file_to_parent( - self.settings.cust_log_file, - os.path.basename(self.settings.cust_log_file), - "text/plain", - int(target_folder_id), + file_url=self.settings.cust_log_file, + file_name=os.path.basename(self.settings.cust_log_file), + mime_type="text/plain", + parent_id=int(target_folder_id), + description="Initial Python Log after first run of customization", ) self.settings.customizer_end_time = datetime.now() @@ -1860,29 +2101,4 @@ def customization_run(self): ) ) - -if __name__ == "__main__": - logging.basicConfig( - format="%(asctime)s %(levelname)s [%(name)s] %(message)s", - datefmt="%d-%b-%Y %H:%M:%S", - level=logging.INFO, - handlers=[ - logging.StreamHandler(sys.stdout), - ], - ) - - my_customizer = Customizer( - otcs=CustomizerSettingsOTCS( - hostname="otcs.eng.terrarium.cloud", - hostname_backend="otcs.eng.terrarium.cloud", - hostname_frontend="otcs.eng.terrarium.cloud", - protocol="https", - port_backend=443, - ), - otds=CustomizerSettingsOTDS(hostname="otds.eng.terrarium.cloud"), - otpd=CustomizerSettingsOTPD(enabled=False), - k8s=CustomizerSettingsK8S(enabled=False), - otiv=CustomizerSettingsOTIV(enabled=True), - ) - - my_customizer.customization_run() + # end method definition diff --git a/pyxecm/customizer/k8s.py b/pyxecm/customizer/k8s.py index 89d83fa..795e87d 100644 --- a/pyxecm/customizer/k8s.py +++ b/pyxecm/customizer/k8s.py @@ -263,7 +263,7 @@ def exec_pod_command( if not pod: logger.error("Pod -> %s does not exist", pod_name) - logger.info("Execute command -> %s in pod -> %s", command, pod_name) + logger.debug("Execute command -> %s in pod -> %s", command, pod_name) retry_counter = 1 @@ -292,7 +292,7 @@ def exec_pod_command( ) retry_counter = retry_counter + 1 exception = exc - logger.info("Wait %s seconds before next retry...", str(time_retry)) + logger.debug("Wait %s seconds before next retry...", str(time_retry)) time.sleep(time_retry) continue @@ -371,17 +371,17 @@ def exec_pod_command_interactive( got_response = False response.update(timeout=timeout) if response.peek_stdout(): - logger.info(response.read_stdout().replace("\n", " ")) + logger.debug(response.read_stdout().replace("\n", " ")) got_response = True if response.peek_stderr(): if write_stderr_to_error_log: logger.error(response.read_stderr().replace("\n", " ")) else: - logger.info(response.read_stderr().replace("\n", " ")) + logger.debug(response.read_stderr().replace("\n", " ")) got_response = True if commands: command = commands.pop(0) - logger.info("Execute command -> %s in pod -> %s", command, pod_name) + logger.debug("Execute command -> %s in pod -> %s", command, pod_name) response.write_stdin(command + "\n") else: # We continue as long as we get some response during timeout period @@ -888,7 +888,7 @@ def update_ingress_backend_services( backend = path.backend service = backend.service - logger.info( + logger.debug( "Replace backend service -> %s (%s) with new backend service -> %s (%s)", service.name, service.port.number, diff --git a/pyxecm/customizer/m365.py b/pyxecm/customizer/m365.py index 26e806f..482f07a 100644 --- a/pyxecm/customizer/m365.py +++ b/pyxecm/customizer/m365.py @@ -30,6 +30,7 @@ get_user_licenses: Get the assigned license SKUs of a user assign_license_to_user: Add an M365 license to a user (e.g. to use Office 365) get_user_photo: Get the photo of a M365 user +download_user_photo: Download the M365 user photo and save it to the local file system update_user_photo: Update a user with a profile photo (which must be in local file system) get_groups: Get list all all groups in M365 tenant @@ -63,6 +64,7 @@ remove_teams_app: Remove MS Teams App for the app catalog assign_teams_app_to_user: Assign (add) a MS Teams app to a M365 user. upgrade_teams_app_of_user: Upgrade a MS teams app for a user. +remove_teams_app_from_user: Remove a M365 Teams app from a M365 user. assign_teams_app_to_team: Assign (add) a MS Teams app to a M365 team (so that it afterwards can be added as a Tab in a M365 Teams Channel) upgrade_teams_app_of_team: Upgrade a MS teams app for a specific team. @@ -74,6 +76,17 @@ THIS IS CURRENTLY NOT WORKING! assign_sensitivity_label_to_user: Create a new sensitivity label in M365 THIS IS CURRENTLY NOT WORKING! + +upload_outlook_app: Upload the M365 Outlook Add-In as "Integrated" App to M365 Admin Center. (NOT WORKING) +get_app_registration: Find an Azure App Registration based on its name +add_app_registration: Add an Azure App Registration +update_app_registration: Update an Azure App Registration + +get_mail: Get email from inbox of a given user and a given sender (from) +get_mail_body: Get full email body for a given email ID +extract_url_from_message_body: Parse the email body to extract (a potentially multi-line) URL from the body. +delete_mail: Delete email from inbox of a given user and a given email ID. +email_verification: Process email verification """ __author__ = "Dr. Marc Diefenbruch" @@ -90,9 +103,13 @@ import urllib.parse import zipfile from urllib.parse import quote +from datetime import datetime import requests +from pyxecm.helper.web import HTTP +from pyxecm.customizer.browser_automation import BrowserAutomation + logger = logging.getLogger("pyxecm.customizer.m365") request_login_headers = { @@ -100,6 +117,7 @@ "Accept": "application/json", } +REQUEST_TIMEOUT = 60 class M365(object): """Used to automate stettings in Microsoft 365 via the Graph API.""" @@ -107,6 +125,7 @@ class M365(object): _config: dict _access_token = None _user_access_token = None + _http_object: HTTP | None = None def __init__( self, @@ -116,6 +135,7 @@ def __init__( domain: str, sku_id: str, teams_app_name: str, + teams_app_external_id: str, ): """Initialize the M365 object @@ -126,6 +146,7 @@ def __init__( domain (str): M365 domain sku_id (str): License SKU for M365 users teams_app_name (str): name of the Extended ECM app for MS Teams + teams_app_external_id (str): external ID of the Extended ECM app for MS Teams """ m365_config = {} @@ -137,6 +158,10 @@ def __init__( m365_config["domain"] = domain m365_config["skuId"] = sku_id m365_config["teamsAppName"] = teams_app_name + m365_config["teamsAppExternalId"] = ( + teams_app_external_id # this is the external App ID + ) + m365_config["teamsAppInternalId"] = None # will be set later... m365_config[ "authenticationUrl" ] = "https://login.microsoftonline.com/{}/oauth2/v2.0/token".format(tenant_id) @@ -162,6 +187,7 @@ def __init__( m365_config["applicationsUrl"] = m365_config["graphUrl"] + "applications" self._config = m365_config + self._http_object = HTTP() def config(self) -> dict: """Returns the configuration dictionary @@ -396,7 +422,7 @@ def authenticate(self, revalidate: bool = False) -> str | None: # Already authenticated and session still valid? if self._access_token and not revalidate: - logger.info( + logger.debug( "Session still valid - return existing access token -> %s", str(self._access_token), ) @@ -405,7 +431,7 @@ def authenticate(self, revalidate: bool = False) -> str | None: request_url = self.config()["authenticationUrl"] request_header = request_login_headers - logger.info("Requesting M365 Access Token from -> %s", request_url) + logger.debug("Requesting M365 Access Token from -> %s", request_url) authenticate_post_body = self.credentials() authenticate_response = None @@ -415,7 +441,7 @@ def authenticate(self, revalidate: bool = False) -> str | None: request_url, data=authenticate_post_body, headers=request_header, - timeout=60, + timeout=REQUEST_TIMEOUT, ) except requests.exceptions.ConnectionError as exception: logger.warning( @@ -459,7 +485,7 @@ def authenticate_user(self, username: str, password: str) -> str | None: request_url = self.config()["authenticationUrl"] request_header = request_login_headers - logger.info( + logger.debug( "Requesting M365 Access Token for user -> %s from -> %s", username, request_url, @@ -473,7 +499,7 @@ def authenticate_user(self, username: str, password: str) -> str | None: request_url, data=authenticate_post_body, headers=request_header, - timeout=60, + timeout=REQUEST_TIMEOUT, ) except requests.exceptions.ConnectionError as exception: logger.warning( @@ -515,16 +541,18 @@ def get_users(self) -> dict | None: request_url = self.config()["usersUrl"] request_header = self.request_header() - logger.info("Get list of all users; calling -> %s", request_url) + logger.debug("Get list of all users; calling -> %s", request_url) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -573,19 +601,46 @@ def get_user(self, user_email: str, show_error: bool = False) -> dict | None: } """ + # Some sanity checks: + if not "@" in user_email or not "." in user_email: + logger.error("User email -> %s is not a valid email address", user_email) + return None + + # if there's an alias in the E-Mail Adress we remove it as + # MS Graph seems to not support an alias to lookup a user object. + if "+" in user_email: + logger.info( + "Removing Alias from email address -> %s to determine M365 principal name...", + user_email, + ) + # Find the index of the '+' character + alias_index = user_email.find("+") + + # Find the index of the '@' character + domain_index = user_email.find("@") + + # Construct the email address without the alias + user_email = user_email[:alias_index] + user_email[domain_index:] + logger.info( + "M365 user principal name -> %s", + user_email, + ) + request_url = self.config()["usersUrl"] + "/" + user_email request_header = self.request_header() - logger.info("Get M365 user -> %s; calling -> %s", user_email, request_url) + logger.debug("Get M365 user -> %s; calling -> %s", user_email, request_url) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -606,7 +661,7 @@ def get_user(self, user_email: str, show_error: bool = False) -> dict | None: response.text, ) else: - logger.info("M365 User -> %s not found.", user_email) + logger.debug("M365 User -> %s not found.", user_email) return None # end method definition @@ -657,7 +712,7 @@ def add_user( request_url = self.config()["usersUrl"] request_header = self.request_header() - logger.info("Adding M365 user -> %s; calling -> %s", email, request_url) + logger.debug("Adding M365 user -> %s; calling -> %s", email, request_url) retries = 0 while True: @@ -665,13 +720,13 @@ def add_user( request_url, data=json.dumps(user_post_body), headers=request_header, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -698,6 +753,9 @@ def update_user(self, user_id: str, updated_settings: dict) -> dict | None: """Update selected properties of an M365 user. Documentation on user properties is here: https://learn.microsoft.com/en-us/graph/api/user-update + Args: + user_id (str): ID of the user (can also be email). This is also the unique identifier + updated_settings (dict): new data to update the user with Returns: dict | None: Response of the M365 Graph API or None if the call fails. """ @@ -705,8 +763,8 @@ def update_user(self, user_id: str, updated_settings: dict) -> dict | None: request_url = self.config()["usersUrl"] + "/" + user_id request_header = self.request_header() - logger.info( - "Updating M365 user -> %s with -> %s; calling -> %s", + logger.debug( + "Updating M365 user with ID -> %s with -> %s; calling -> %s", user_id, str(updated_settings), request_url, @@ -718,13 +776,13 @@ def update_user(self, user_id: str, updated_settings: dict) -> dict | None: request_url, json=updated_settings, headers=request_header, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -775,12 +833,14 @@ def get_user_licenses(self, user_id: str) -> dict | None: retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -830,7 +890,7 @@ def assign_license_to_user(self, user_id: str, sku_id: str) -> dict | None: "removeLicenses": [], } - logger.info( + logger.debug( "Assign M365 license -> %s to M365 user -> %s; calling -> %s", sku_id, user_id, @@ -840,13 +900,16 @@ def assign_license_to_user(self, user_id: str, sku_id: str) -> dict | None: retries = 0 while True: response = requests.post( - request_url, json=license_post_body, headers=request_header, timeout=60 + request_url, + json=license_post_body, + headers=request_header, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -885,16 +948,18 @@ def get_user_photo(self, user_id: str, show_error: bool = True) -> bytes | None: # Set image as content type: request_header = self.request_header("image/*") - logger.info("Get photo of user -> %s; calling -> %s", user_id, request_url) + logger.debug("Get photo of user -> %s; calling -> %s", user_id, request_url) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return response.content # this is the actual image - not json! # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -915,7 +980,88 @@ def get_user_photo(self, user_id: str, show_error: bool = True) -> bytes | None: response.text, ) else: - logger.info("User -> %s does not yet have a photo.", user_id) + logger.debug("M365 User -> %s does not yet have a photo.", user_id) + return None + + # end method definition + + def download_user_photo(self, user_id: str, photo_path: str) -> str: + """Download the M365 user photo and save it to the local file system + + Args: + user_id (str): M365 GUID of the user (can also be the M365 email of the user) + photo_path (str): Directory where the photo should be saved + Returns: + str: name of the photo file in the file system (with full path) or None if + the call of the REST API fails. + """ + + request_url = self.config()["usersUrl"] + "/" + user_id + "/photo/$value" + request_header = self.request_header("application/json") + + logger.debug( + "Downloading photo for M365 user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + retries = 0 + while True: + response = requests.get( + request_url, + headers=request_header, + timeout=REQUEST_TIMEOUT, + stream=True, + ) + if response.ok: + content_type = response.headers.get("Content-Type", "image/png") + if content_type == "image/jpeg": + file_extension = "jpg" + elif content_type == "image/png": + file_extension = "png" + else: + file_extension = "img" # Default extension if type is unknown + file_path = os.path.join( + photo_path, "{}.{}".format(user_id, file_extension) + ) + + try: + with open(file_path, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + logger.info( + "Photo for M365 user with ID -> %s saved to %s", + user_id, + file_path, + ) + return file_path + except OSError as exception: + logger.error( + "Error saving photo for user with ID -> %s; error -> %s", + user_id, + exception, + ) + return None + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header("application/json") + retries += 1 + elif response.status_code in [502, 503, 504] and retries < 3: + logger.warning( + "M365 Graph API delivered server side error -> %s; retrying in %s seconds...", + response.status_code, + (retries + 1) * 60, + ) + time.sleep((retries + 1) * 60) + retries += 1 + else: + logger.error( + "Failed to download photo for user with ID -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) return None # end method definition @@ -952,8 +1098,8 @@ def update_user_photo(self, user_id: str, photo_path: str) -> dict | None: data = photo_data - logger.info( - "Update M365 user -> %s with photo -> %s; calling -> %s", + logger.debug( + "Update M365 user with ID -> %s with photo -> %s; calling -> %s", user_id, photo_path, request_url, @@ -962,13 +1108,13 @@ def update_user_photo(self, user_id: str, photo_path: str) -> dict | None: retries = 0 while True: response = requests.put( - request_url, headers=request_header, data=data, timeout=60 + request_url, headers=request_header, data=data, timeout=REQUEST_TIMEOUT ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -982,7 +1128,7 @@ def update_user_photo(self, user_id: str, photo_path: str) -> dict | None: retries += 1 else: logger.error( - "Failed to update user -> %s with photo -> %s; status -> %s; error -> %s", + "Failed to update user with ID -> %s with photo -> %s; status -> %s; error -> %s", user_id, photo_path, response.status_code, @@ -1004,7 +1150,7 @@ def get_groups(self, max_number: int = 250) -> dict | None: request_url = self.config()["groupsUrl"] request_header = self.request_header() - logger.info("Get list of all M365 groups; calling -> %s", request_url) + logger.debug("Get list of all M365 groups; calling -> %s", request_url) retries = 0 while True: @@ -1012,13 +1158,13 @@ def get_groups(self, max_number: int = 250) -> dict | None: request_url, headers=request_header, params={"$top": str(max_number)}, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1101,16 +1247,18 @@ def get_group(self, group_name: str, show_error: bool = False) -> dict | None: request_url = self.config()["groupsUrl"] + "?" + encoded_query request_header = self.request_header() - logger.info("Get M365 group -> %s; calling -> %s", group_name, request_url) + logger.debug("Get M365 group -> %s; calling -> %s", group_name, request_url) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1131,7 +1279,7 @@ def get_group(self, group_name: str, show_error: bool = False) -> dict | None: response.text, ) else: - logger.info("M365 Group -> %s not found.", group_name) + logger.debug("M365 Group -> %s not found.", group_name) return None # end method definition @@ -1197,7 +1345,7 @@ def add_group( request_url = self.config()["groupsUrl"] request_header = self.request_header() - logger.info("Adding M365 group -> %s; calling -> %s", name, request_url) + logger.debug("Adding M365 group -> %s; calling -> %s", name, request_url) logger.debug("M365 group attributes -> %s", group_post_body) retries = 0 @@ -1206,13 +1354,13 @@ def add_group( request_url, data=json.dumps(group_post_body), headers=request_header, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1261,7 +1409,7 @@ def get_group_members(self, group_name: str) -> dict | None: ) request_header = self.request_header() - logger.info( + logger.debug( "Get members of M365 group -> %s (%s); calling -> %s", group_name, group_id, @@ -1270,12 +1418,14 @@ def get_group_members(self, group_name: str) -> dict | None: retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1316,7 +1466,7 @@ def add_group_member(self, group_id: str, member_id: str) -> dict | None: "@odata.id": self.config()["directoryObjects"] + "/" + member_id } - logger.info( + logger.debug( "Adding member -> %s to group -> %s; calling -> %s", member_id, group_id, @@ -1329,14 +1479,14 @@ def add_group_member(self, group_id: str, member_id: str) -> dict | None: request_url, headers=request_header, data=json.dumps(group_member_post_body), - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more if response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1379,7 +1529,7 @@ def is_member(self, group_id: str, member_id: str, show_error: bool = True) -> b ) request_header = self.request_header() - logger.info( + logger.debug( "Check if user -> %s is in group -> %s; calling -> %s", member_id, group_id, @@ -1388,7 +1538,9 @@ def is_member(self, group_id: str, member_id: str, show_error: bool = True) -> b retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: response = self.parse_request_response(response) if not "value" in response or len(response["value"]) == 0: @@ -1396,7 +1548,7 @@ def is_member(self, group_id: str, member_id: str, show_error: bool = True) -> b return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1449,7 +1601,7 @@ def get_group_owners(self, group_name: str) -> dict | None: ) request_header = self.request_header() - logger.info( + logger.debug( "Get owners of M365 group -> %s (%s); calling -> %s", group_name, group_id, @@ -1458,12 +1610,14 @@ def get_group_owners(self, group_name: str) -> dict | None: retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1504,7 +1658,7 @@ def add_group_owner(self, group_id: str, owner_id: str) -> dict | None: "@odata.id": self.config()["directoryObjects"] + "/" + owner_id } - logger.info( + logger.debug( "Adding owner -> %s to M365 group -> %s; calling -> %s", owner_id, group_id, @@ -1517,13 +1671,13 @@ def add_group_owner(self, group_id: str, owner_id: str) -> dict | None: request_url, headers=request_header, data=json.dumps(group_member_post_body), - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1558,7 +1712,9 @@ def purge_deleted_items(self): request_url = ( self.config()["directoryUrl"] + "/deletedItems/microsoft.graph.group" ) - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) deleted_groups = self.parse_request_response(response) for group in deleted_groups["value"]: @@ -1568,7 +1724,9 @@ def purge_deleted_items(self): request_url = ( self.config()["directoryUrl"] + "/deletedItems/microsoft.graph.user" ) - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) deleted_users = self.parse_request_response(response) for user in deleted_users["value"]: @@ -1591,16 +1749,18 @@ def purge_deleted_item(self, item_id: str) -> dict | None: request_url = self.config()["directoryUrl"] + "/deletedItems/" + item_id request_header = self.request_header() - logger.info("Purging deleted item -> %s; calling -> %s", item_id, request_url) + logger.debug("Purging deleted item -> %s; calling -> %s", item_id, request_url) retries = 0 while True: - response = requests.delete(request_url, headers=request_header, timeout=60) + response = requests.delete( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1644,7 +1804,7 @@ def has_team(self, group_name: str) -> bool: request_url = self.config()["groupsUrl"] + "/" + group_id + "/team" request_header = self.request_header() - logger.info( + logger.debug( "Check if M365 Group -> %s has a M365 Team connected; calling -> %s", group_name, request_url, @@ -1652,16 +1812,18 @@ def has_team(self, group_name: str) -> bool: retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.status_code == 200: # Group has a Team assigned! - logger.info("Group -> %s has a M365 Team connected.", group_name) + logger.debug("Group -> %s has a M365 Team connected.", group_name) return True elif response.status_code == 404: # Group does not have a Team assigned! - logger.info("Group -> %s has no M365 Team connected.", group_name) + logger.debug("Group -> %s has no M365 Team connected.", group_name) return False elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1734,20 +1896,22 @@ def get_team(self, name: str) -> dict | None: request_header = self.request_header() - logger.info( - "Lookup Microsoft 365 Teams with name -> %s; calling -> %s", + logger.debug( + "Lookup Microsoft 365 Teams with name -> '%s'; calling -> %s", name, request_url, ) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1785,7 +1949,7 @@ def add_team(self, name: str, template_name: str = "standard") -> dict | None: group_id = self.get_result_value(response, "id", 0) if not group_id: logger.error( - "M365 Group -> %s not found. It is required for creating a corresponding M365 Team.", + "M365 Group -> '%s' not found. It is required for creating a corresponding M365 Team.", name, ) return None @@ -1793,7 +1957,7 @@ def add_team(self, name: str, template_name: str = "standard") -> dict | None: response = self.get_group_owners(name) if response is None or not "value" in response or not response["value"]: logger.warning( - "M365 Group -> %s has no owners. This is required for creating a corresponding M365 Team.", + "M365 Group -> '%s' has no owners. This is required for creating a corresponding M365 Team.", name, ) return None @@ -1808,7 +1972,7 @@ def add_team(self, name: str, template_name: str = "standard") -> dict | None: request_url = self.config()["teamsUrl"] request_header = self.request_header() - logger.info("Adding M365 Team -> %s; calling -> %s", name, request_url) + logger.debug("Adding M365 Team -> %s; calling -> %s", name, request_url) logger.debug("M365 Team attributes -> %s", team_post_body) retries = 0 @@ -1817,13 +1981,13 @@ def add_team(self, name: str, template_name: str = "standard") -> dict | None: request_url, data=json.dumps(team_post_body), headers=request_header, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1837,7 +2001,7 @@ def add_team(self, name: str, template_name: str = "standard") -> dict | None: retries += 1 else: logger.error( - "Failed to add M365 Team -> %s; status -> %s; error -> %s", + "Failed to add M365 Team -> '%s'; status -> %s; error -> %s", name, response.status_code, response.text, @@ -1859,7 +2023,7 @@ def delete_team(self, team_id: str) -> dict | None: request_header = self.request_header() - logger.info( + logger.debug( "Delete Microsoft 365 Teams with ID -> %s; calling -> %s", team_id, request_url, @@ -1867,12 +2031,14 @@ def delete_team(self, team_id: str) -> dict | None: retries = 0 while True: - response = requests.delete(request_url, headers=request_header, timeout=60) + response = requests.delete( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1911,21 +2077,23 @@ def delete_teams(self, name: str) -> bool: request_header = self.request_header() - logger.info( - "Delete all Microsoft 365 Teams with name -> %s; calling -> %s", + logger.debug( + "Delete all Microsoft 365 Teams with name -> '%s'; calling -> %s", name, request_url, ) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: existing_teams = self.parse_request_response(response) break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1962,16 +2130,16 @@ def delete_teams(self, name: str) -> bool: counter += 1 logger.info( - "%s M365 Teams with name -> %s have been deleted.", + "%s M365 Teams with name -> '%s' have been deleted.", str(counter), name, ) return True else: - logger.info("No M365 Teams with name -> %s found.", name) + logger.info("No M365 Teams with name -> '%s' found.", name) return False else: - logger.error("Failed to retrieve M365 Teams with name -> %s", name) + logger.error("Failed to retrieve M365 Teams with name -> '%s'", name) return False # end method definition @@ -1995,19 +2163,20 @@ def delete_all_teams(self, exception_list: list, pattern_list: list) -> bool: if not "value" in response or not response["value"]: return False groups = response["value"] + logger.info( "Found -> %s existing M365 groups. Checking which ones should be deleted...", len(groups), ) - # Process all groups and check if the< should be + # Process all groups and check if they should be # deleted: for group in groups: group_name = group["displayName"] # Check if group is in exception list: if group_name in exception_list: logger.info( - "M365 Group name -> %s is on the exception list. Skipping...", + "M365 Group name -> '%s' is on the exception list. Skipping...", group_name, ) continue @@ -2016,7 +2185,7 @@ def delete_all_teams(self, exception_list: list, pattern_list: list) -> bool: result = re.search(pattern, group_name) if result: logger.info( - "M365 Group name -> %s is matching pattern -> %s. Delete it now...", + "M365 Group name -> '%s' is matching pattern -> %s. Delete it now...", group_name, pattern, ) @@ -2024,7 +2193,7 @@ def delete_all_teams(self, exception_list: list, pattern_list: list) -> bool: break else: logger.info( - "M365 Group name -> %s is not matching any delete pattern. Skipping...", + "M365 Group name -> '%s' is not matching any delete pattern. Skipping...", group_name, ) return True @@ -2068,20 +2237,22 @@ def get_team_channels(self, name: str) -> dict | None: request_header = self.request_header() - logger.info( - "Retrieve channels of Microsoft 365 Team -> %s; calling -> %s", + logger.debug( + "Retrieve channels of Microsoft 365 Team -> '%s'; calling -> %s", name, request_url, ) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2151,7 +2322,7 @@ def get_team_channel_tabs(self, team_name: str, channel_name: str) -> dict | Non None, ) if not channel: - logger.erro( + logger.error( "Cannot find Channel -> %s on M365 Team -> %s", channel_name, team_name ) return None @@ -2168,7 +2339,7 @@ def get_team_channel_tabs(self, team_name: str, channel_name: str) -> dict | Non request_header = self.request_header() - logger.info( + logger.debug( "Retrieve Tabs of Microsoft 365 Teams -> %s and Channel -> %s; calling -> %s", team_name, channel_name, @@ -2177,12 +2348,14 @@ def get_team_channel_tabs(self, team_name: str, channel_name: str) -> dict | Non retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2257,24 +2430,26 @@ def get_teams_apps(self, filter_expression: str = "") -> dict | None: request_url = self.config()["teamsAppsUrl"] + "?" + encoded_query if filter_expression: - logger.info( + logger.debug( "Get list of MS Teams Apps using filter -> %s; calling -> %s", filter_expression, request_url, ) else: - logger.info("Get list of all MS Teams Apps; calling -> %s", request_url) + logger.debug("Get list of all MS Teams Apps; calling -> %s", request_url) request_header = self.request_header() retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2297,34 +2472,58 @@ def get_teams_apps(self, filter_expression: str = "") -> dict | None: # end method definition def get_teams_app(self, app_id: str) -> dict | None: - """Get a specific MS Teams app in catalog based on the known app ID + """Get a specific MS Teams app in catalog based on the known (internal) app ID Args: - app_id (str): ID of the app + app_id (str): ID of the app (this is NOT the external ID but the internal ID) Returns: dict: response of the MS Graph API call or None if the call fails. + + Examle response: + { + '@odata.context': 'https://graph.microsoft.com/v1.0/$metadata#appCatalogs/teamsApps(appDefinitions())/$entity', + 'id': 'ccabe3fb-316f-40e0-a486-1659682cb8cd', + 'externalId': 'dd4af790-d8ff-47a0-87ad-486318272c7a', + 'displayName': 'Extended ECM', + 'distributionMethod': 'organization', + 'appDefinitions@odata.context': "https://graph.microsoft.com/v1.0/$metadata#appCatalogs/teamsApps('ccabe3fb-316f-40e0-a486-1659682cb8cd')/appDefinitions", + 'appDefinitions': [ + { + 'id': 'Y2NhYmUzZmItMzE2Zi00MGUwLWE0ODYtMTY1OTY4MmNiOGNkIyMyNC4yLjAjI1B1Ymxpc2hlZA==', + 'teamsAppId': 'ccabe3fb-316f-40e0-a486-1659682cb8cd', + 'displayName': 'Extended ECM', + 'version': '24.2.0', + 'publishingState': 'published', + 'shortDescription': 'Add a tab for an Extended ECM business workspace.', + 'description': 'View and interact with OpenText Extended ECM business workspaces', + 'lastModifiedDateTime': None, + 'createdBy': None, + 'authorization': {...} + } + ] + } """ query = {"$expand": "AppDefinitions"} encoded_query = urllib.parse.urlencode(query, doseq=True) request_url = self.config()["teamsAppsUrl"] + "/" + app_id + "?" + encoded_query - # request_url = self.config()["teamsAppsUrl"] + "/" + app_id - - logger.info( - "Get MS Teams App with ID -> %s; calling -> %s", app_id, request_url + logger.debug( + "Get M365 Teams App with ID -> %s; calling -> %s", app_id, request_url ) request_header = self.request_header() retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2370,7 +2569,8 @@ def get_teams_apps_of_user( + "/teamwork/installedApps?" + encoded_query ) - logger.info( + + logger.debug( "Get list of M365 Teams Apps for user -> %s using query -> %s; calling -> %s", user_id, query, @@ -2381,12 +2581,14 @@ def get_teams_apps_of_user( retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2433,7 +2635,8 @@ def get_teams_apps_of_team( + "/installedApps?" + encoded_query ) - logger.info( + + logger.debug( "Get list of M365 Teams Apps for M365 Team -> %s using query -> %s; calling -> %s", team_id, query, @@ -2444,12 +2647,14 @@ def get_teams_apps_of_team( retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2501,6 +2706,7 @@ def upload_teams_app( but requires a token of a user authenticated with username + password. See https://learn.microsoft.com/en-us/graph/api/teamsapp-publish (permissions table on that page) + For updates see: https://learn.microsoft.com/en-us/graph/api/teamsapp-update?view=graph-rest-1.0&tabs=http Args: app_path (str): file path (with directory) to the app package to upload @@ -2511,6 +2717,34 @@ def upload_teams_app( after installation (which is tenant specific) Returns: dict: Response of the MS GRAPH API REST call or None if the request fails + + The responses are different depending if it is an install or upgrade!! + + Example return for upgrades ("teamsAppId" is the "internal" ID of the app): + { + '@odata.context': "https://graph.microsoft.com/v1.0/$metadata#appCatalogs/teamsApps('3f749cca-8cb0-4925-9fa0-ba7aca2014af')/appDefinitions/$entity", + 'id': 'M2Y3NDljY2EtOGNiMC00OTI1LTlmYTAtYmE3YWNhMjAxNGFmIyMyNC4yLjAjI1B1Ymxpc2hlZA==', + 'teamsAppId': '3f749cca-8cb0-4925-9fa0-ba7aca2014af', + 'displayName': 'IDEA-TE - Extended ECM 24.2.0', + 'version': '24.2.0', + 'publishingState': 'published', + 'shortDescription': 'Add a tab for an Extended ECM business workspace.', + 'description': 'View and interact with OpenText Extended ECM business workspaces', + 'lastModifiedDateTime': None, + 'createdBy': None, + 'authorization': { + 'requiredPermissionSet': {...} + } + } + + Example return for new installations ("id" is the "internal" ID of the app): + { + '@odata.context': 'https://graph.microsoft.com/v1.0/$metadata#appCatalogs/teamsApps/$entity', + 'id': '6c672afd-37fc-46c6-8365-d499aba3808b', + 'externalId': 'dd4af790-d8ff-47a0-87ad-486318272c7a', + 'displayName': 'OpenText Extended ECM', + 'distributionMethod': 'organization' + } """ if update_existing_app and not app_catalog_id: @@ -2520,12 +2754,12 @@ def upload_teams_app( return None if not os.path.exists(app_path): - logger.error("M365 Teams app file -> {} does not exist!") + logger.error("M365 Teams app file -> %s does not exist!", app_path) return None # Ensure that the app file is a zip file if not app_path.endswith(".zip"): - logger.error("M365 Teams app file -> {} must be a zip file!") + logger.error("M365 Teams app file -> %s must be a zip file!", app_path) return None request_url = self.config()["teamsAppsUrl"] @@ -2545,12 +2779,13 @@ def upload_teams_app( # Ensure that the app file contains a manifest.json file if "manifest.json" not in z.namelist(): logger.error( - "M365 Teams app file -> {} does not contain a manifest.json file!" + "M365 Teams app file -> '%s' does not contain a manifest.json file!", + app_path, ) return None - logger.info( - "Upload M365 Teams app -> %s to the MS Teams catalog; calling -> %s", + logger.debug( + "Upload M365 Teams app -> '%s' to the MS Teams catalog; calling -> %s", app_path, request_url, ) @@ -2558,14 +2793,17 @@ def upload_teams_app( retries = 0 while True: response = requests.post( - request_url, headers=request_header, data=app_data, timeout=60 + request_url, + headers=request_header, + data=app_data, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more if response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2580,7 +2818,7 @@ def upload_teams_app( else: if update_existing_app: logger.warning( - "Failed to update existing M365 Teams app -> %s (may be because it is not a new version); status -> %s; error -> %s", + "Failed to update existing M365 Teams app -> '%s' (may be because it is not a new version); status -> %s; error -> %s", app_path, response.status_code, response.text, @@ -2588,7 +2826,7 @@ def upload_teams_app( else: logger.error( - "Failed to upload new M365 Teams app -> %s; status -> %s; error -> %s", + "Failed to upload new M365 Teams app -> '%s'; status -> %s; error -> %s", app_path, response.status_code, response.text, @@ -2610,11 +2848,13 @@ def remove_teams_app(self, app_id: str): request_header = self.request_header_user() # Make the DELETE request to remove the app from the app catalog - response = requests.delete(request_url, headers=request_header, timeout=60) + response = requests.delete( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) # Check the status code of the response if response.status_code == 204: - logger.info( + logger.debug( "The M365 Teams app with ID -> %s has been successfully removed from the app catalog.", app_id, ) @@ -2627,35 +2867,160 @@ def remove_teams_app(self, app_id: str): # end method definition - def assign_teams_app_to_user(self, user_id: str, app_name: str) -> dict | None: + def assign_teams_app_to_user( + self, + user_id: str, + app_name: str = "", + app_internal_id: str = "", + show_error: bool = False, + ) -> dict | None: """Assigns (adds) a M365 Teams app to a M365 user. + See: https://learn.microsoft.com/en-us/graph/api/userteamwork-post-installedapps?view=graph-rest-1.0&tabs=http + Args: user_id (str): M365 GUID of the user (can also be the M365 email of the user) - app_name (str): exact name of the app + app_name (str, optional): exact name of the app. Not needed if app_internal_id is provided + app_internal_id (str, optional): internal ID of the app. If not provided it will be derived from app_name + show_error (bool): whether or not an error should be displayed if the + user is not found. Returns: dict: response of the MS Graph API call or None if the call fails. """ - response = self.get_teams_apps(f"contains(displayName, '{app_name}')") - app_id = self.get_result_value(response, "id", 0) - if not app_id: - logger.error("M365 Teams App -> %s not found!", app_name) + if not app_internal_id and not app_name: + logger.error( + "Either the internal App ID or the App name need to be provided!" + ) return None + if not app_internal_id: + response = self.get_teams_apps( + filter_expression="contains(displayName, '{}')".format(app_name) + ) + app_internal_id = self.get_result_value( + response=response, key="id", index=0 + ) + if not app_internal_id: + logger.error( + "M365 Teams App -> '%s' not found! Cannot assign App to user -> %s.", + app_name, + user_id, + ) + return None + request_url = ( self.config()["usersUrl"] + "/" + user_id + "/teamwork/installedApps" ) request_header = self.request_header() post_body = { - "teamsApp@odata.bind": self.config()["teamsAppsUrl"] + "/" + app_id + "teamsApp@odata.bind": self.config()["teamsAppsUrl"] + "/" + app_internal_id } - logger.info( - "Assign M365 Teams app -> %s (%s) to M365 user -> %s; calling -> %s", + logger.debug( + "Assign M365 Teams app -> '%s' (%s) to M365 user -> %s; calling -> %s", app_name, - app_id, + app_internal_id, + user_id, + request_url, + ) + + retries = 0 + while True: + response = requests.post( + request_url, + json=post_body, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + elif response.status_code in [502, 503, 504] and retries < 3: + logger.warning( + "M365 Graph API delivered server side error -> %s; retrying in %s seconds...", + response.status_code, + (retries + 1) * 60, + ) + time.sleep((retries + 1) * 60) + retries += 1 + else: + if show_error: + logger.error( + "Failed to assign M365 Teams app -> '%s' (%s) to M365 user -> %s; status -> %s; error -> %s", + app_name, + app_internal_id, + user_id, + response.status_code, + response.text, + ) + else: + logger.warning( + "Failed to assign M365 Teams app -> '%s' (%s) to M365 user -> %s (could be because the app is assigned organization-wide); status -> %s; warning -> %s", + app_name, + app_internal_id, + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def upgrade_teams_app_of_user( + self, user_id: str, app_name: str, app_installation_id: str | None = None + ) -> dict | None: + """Upgrade a MS teams app for a user. The call will fail if the user does not + already have the app assigned. So this needs to be checked before + calling this method. + See: https://learn.microsoft.com/en-us/graph/api/userteamwork-teamsappinstallation-upgrade?view=graph-rest-1.0&tabs=http + + Args: + user_id (str): M365 GUID of the user (can also be the M365 email of the user) + app_name (str): exact name of the app + app_installation_id (str): ID of the app installation for the user. This is neither the internal nor + external app ID. It is specific for each user and app. + Returns: + dict: response of the MS Graph API call or None if the call fails. + """ + + if not app_installation_id: + response = self.get_teams_apps_of_user( + user_id=user_id, + filter_expression="contains(teamsAppDefinition/displayName, '{}')".format( + app_name + ), + ) + # Retrieve the installation specific App ID - this is different from thew App catalalog ID!! + app_installation_id = self.get_result_value(response, "id", 0) + if not app_installation_id: + logger.error( + "M365 Teams app -> '%s' not found for user with ID -> %s. Cannot upgrade app for this user!", + app_name, + user_id, + ) + return None + + request_url = ( + self.config()["usersUrl"] + + "/" + + user_id + + "/teamwork/installedApps/" + + app_installation_id + + "/upgrade" + ) + request_header = self.request_header() + + logger.debug( + "Upgrade M365 Teams app -> '%s' (%s) of M365 user with ID -> %s; calling -> %s", + app_name, + app_installation_id, user_id, request_url, ) @@ -2663,13 +3028,13 @@ def assign_teams_app_to_user(self, user_id: str, app_name: str) -> dict | None: retries = 0 while True: response = requests.post( - request_url, json=post_body, headers=request_header, timeout=60 + request_url, headers=request_header, timeout=REQUEST_TIMEOUT ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2683,9 +3048,9 @@ def assign_teams_app_to_user(self, user_id: str, app_name: str) -> dict | None: retries += 1 else: logger.error( - "Failed to assign M365 Teams app -> %s (%s) to M365 user -> %s; status -> %s; error -> %s", + "Failed to upgrade M365 Teams app -> '%s' (%s) of M365 user -> %s; status -> %s; error -> %s", app_name, - app_id, + app_installation_id, user_id, response.status_code, response.text, @@ -2694,10 +3059,12 @@ def assign_teams_app_to_user(self, user_id: str, app_name: str) -> dict | None: # end method definition - def upgrade_teams_app_of_user(self, user_id: str, app_name: str) -> dict | None: - """Upgrade a MS teams app for a user. The call will fail if the user does not - already have the app assigned. So this needs to be checked before - calling this method. + def remove_teams_app_from_user( + self, user_id: str, app_name: str, app_installation_id: str | None = None + ) -> dict | None: + """Remove a M365 Teams app from a M365 user. + + See: https://learn.microsoft.com/en-us/graph/api/userteamwork-delete-installedapps?view=graph-rest-1.0&tabs=http Args: user_id (str): M365 GUID of the user (can also be the M365 email of the user) @@ -2706,14 +3073,18 @@ def upgrade_teams_app_of_user(self, user_id: str, app_name: str) -> dict | None: dict: response of the MS Graph API call or None if the call fails. """ - response = self.get_teams_apps_of_user( - user_id, "contains(teamsAppDefinition/displayName, '{}')".format(app_name) - ) - # Retrieve the installation specific App ID - this is different from thew App catalalog ID!! - app_installation_id = self.get_result_value(response, "id", 0) + if not app_installation_id: + response = self.get_teams_apps_of_user( + user_id=user_id, + filter_expression="contains(teamsAppDefinition/displayName, '{}')".format( + app_name + ), + ) + # Retrieve the installation specific App ID - this is different from thew App catalalog ID!! + app_installation_id = self.get_result_value(response, "id", 0) if not app_installation_id: logger.error( - "M365 Teams app -> %s not found for user with ID -> %s. Cannot upgrade app for this user!", + "M365 Teams app -> '%s' not found for user with ID -> %s. Cannot remove app from this user!", app_name, user_id, ) @@ -2725,12 +3096,11 @@ def upgrade_teams_app_of_user(self, user_id: str, app_name: str) -> dict | None: + user_id + "/teamwork/installedApps/" + app_installation_id - + "/upgrade" ) request_header = self.request_header() - logger.info( - "Upgrade M365 Teams app -> %s (%s) of M365 user with ID -> %s; calling -> %s", + logger.debug( + "Remove M365 Teams app -> '%s' (%s) from M365 user with ID -> %s; calling -> %s", app_name, app_installation_id, user_id, @@ -2739,12 +3109,14 @@ def upgrade_teams_app_of_user(self, user_id: str, app_name: str) -> dict | None: retries = 0 while True: - response = requests.post(request_url, headers=request_header, timeout=60) + response = requests.delete( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2758,7 +3130,7 @@ def upgrade_teams_app_of_user(self, user_id: str, app_name: str) -> dict | None: retries += 1 else: logger.error( - "Failed to upgrade M365 Teams app -> %s (%s) of M365 user -> %s; status -> %s; error -> %s", + "Failed to remove M365 Teams app -> '%s' (%s) from M365 user -> %s; status -> %s; error -> %s", app_name, app_installation_id, user_id, @@ -2788,8 +3160,9 @@ def assign_teams_app_to_team(self, team_id: str, app_id: str) -> dict | None: "teamsApp@odata.bind": self.config()["teamsAppsUrl"] + "/" + app_id } - logger.info( - "Assign M365 Teams app -> %s to M365 Team -> %s; calling -> %s", + logger.debug( + "Assign M365 Teams app -> '%s' (%s) to M365 Team -> %s; calling -> %s", + self.config()["teamsAppName"], app_id, team_id, request_url, @@ -2798,13 +3171,16 @@ def assign_teams_app_to_team(self, team_id: str, app_id: str) -> dict | None: retries = 0 while True: response = requests.post( - request_url, json=post_body, headers=request_header, timeout=60 + request_url, + json=post_body, + headers=request_header, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2818,7 +3194,8 @@ def assign_teams_app_to_team(self, team_id: str, app_id: str) -> dict | None: retries += 1 else: logger.error( - "Failed to assign M365 Teams app -> %s to M365 Team -> %s; status -> %s; error -> %s", + "Failed to assign M365 Teams app -> '%s' (%s) to M365 Team -> %s; status -> %s; error -> %s", + self.config()["teamsAppName"], app_id, team_id, response.status_code, @@ -2848,7 +3225,7 @@ def upgrade_teams_app_of_team(self, team_id: str, app_name: str) -> dict | None: app_installation_id = self.get_result_value(response, "id", 0) if not app_installation_id: logger.error( - "M365 Teams app -> %s not found for M365 Team with ID -> %s. Cannot upgrade app for this team!", + "M365 Teams app -> '%s' not found for M365 Team with ID -> %s. Cannot upgrade app for this team!", app_name, team_id, ) @@ -2864,8 +3241,8 @@ def upgrade_teams_app_of_team(self, team_id: str, app_name: str) -> dict | None: ) request_header = self.request_header() - logger.info( - "Upgrade app -> %s (%s) of M365 team with ID -> %s; calling -> %s", + logger.debug( + "Upgrade app -> '%s' (%s) of M365 team with ID -> %s; calling -> %s", app_name, app_installation_id, team_id, @@ -2874,12 +3251,14 @@ def upgrade_teams_app_of_team(self, team_id: str, app_name: str) -> dict | None: retries = 0 while True: - response = requests.post(request_url, headers=request_header, timeout=60) + response = requests.post( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -2893,7 +3272,7 @@ def upgrade_teams_app_of_team(self, team_id: str, app_name: str) -> dict | None: retries += 1 else: logger.error( - "Failed to upgrade app -> %s (%s) of M365 team with ID -> %s; status -> %s; error -> %s", + "Failed to upgrade M365 Teams app -> '%s' (%s) of M365 team with ID -> %s; status -> %s; error -> %s", app_name, app_installation_id, team_id, @@ -2945,8 +3324,10 @@ def add_teams_app_to_channel( None, ) if not channel: - logger.erro( - "Cannot find Channel -> %s on M365 Team -> %s", channel_name, team_name + logger.error( + "Cannot find Channel -> '%s' on M365 Team -> '%s'", + channel_name, + team_name, ) return None channel_id = channel["id"] @@ -2974,8 +3355,8 @@ def add_teams_app_to_channel( }, } - logger.info( - "Add Tab -> %s with App ID -> %s to Channel -> %s of Microsoft 365 Team -> %s; calling -> %s", + logger.debug( + "Add Tab -> '%s' with App ID -> %s to Channel -> '%s' of Microsoft 365 Team -> '%s'; calling -> %s", tab_name, app_id, channel_name, @@ -2986,13 +3367,16 @@ def add_teams_app_to_channel( retries = 0 while True: response = requests.post( - request_url, headers=request_header, json=tab_config, timeout=60 + request_url, + headers=request_header, + json=tab_config, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3006,7 +3390,7 @@ def add_teams_app_to_channel( retries += 1 else: logger.error( - "Failed to add Tab for M365 Team -> %s (%s) and Channel -> %s (%s); status -> %s; error -> %s; tab config -> %s", + "Failed to add Tab for M365 Team -> '%s' (%s) and Channel -> '%s' (%s); status -> %s; error -> %s; tab config -> %s", team_name, team_id, channel_name, @@ -3058,8 +3442,10 @@ def update_teams_app_of_channel( None, ) if not channel: - logger.erro( - "Cannot find Channel -> %s for M365 Team -> %s", channel_name, team_name + logger.error( + "Cannot find Channel -> '%s' for M365 Team -> '%s'", + channel_name, + team_name, ) return None channel_id = channel["id"] @@ -3075,8 +3461,8 @@ def update_teams_app_of_channel( None, ) if not tab: - logger.erro( - "Cannot find Tab -> %s on M365 Team -> %s (%s) and Channel -> %s (%s)", + logger.error( + "Cannot find Tab -> '%s' on M365 Team -> '%s' (%s) and Channel -> '%s' (%s)", tab_name, team_name, team_id, @@ -3108,8 +3494,8 @@ def update_teams_app_of_channel( }, } - logger.info( - "Update Tab -> %s (%s) of Channel -> %s (%s) for Microsoft 365 Teams -> %s (%s) with configuration -> %s; calling -> %s", + logger.debug( + "Update Tab -> '%s' (%s) of Channel -> '%s' (%s) for Microsoft 365 Teams -> '%s' (%s) with configuration -> %s; calling -> %s", tab_name, tab_id, channel_name, @@ -3123,13 +3509,16 @@ def update_teams_app_of_channel( retries = 0 while True: response = requests.patch( - request_url, headers=request_header, json=tab_config, timeout=60 + request_url, + headers=request_header, + json=tab_config, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3143,7 +3532,7 @@ def update_teams_app_of_channel( retries += 1 else: logger.error( - "Failed to update Tab -> %s (%s) for M365 Team -> %s (%s) and Channel -> %s (%s); status -> %s; error -> %s", + "Failed to update Tab -> '%s' (%s) for M365 Team -> '%s' (%s) and Channel -> '%s' (%s); status -> %s; error -> %s", tab_name, tab_id, team_name, @@ -3189,8 +3578,10 @@ def delete_teams_app_from_channel( None, ) if not channel: - logger.erro( - "Cannot find Channel -> %s for M365 Team -> %s", channel_name, team_name + logger.error( + "Cannot find Channel -> '%s' for M365 Team -> '%s'", + channel_name, + team_name, ) return False channel_id = channel["id"] @@ -3206,8 +3597,8 @@ def delete_teams_app_from_channel( item for item in response["value"] if item["displayName"] == tab_name ] if not tab_list: - logger.erro( - "Cannot find Tabs with name -> %s on M365 Team -> %s (%s) and Channel -> %s (%s)", + logger.error( + "Cannot find Tab -> '%s' on M365 Team -> '%s' (%s) and Channel -> '%s' (%s)", tab_name, team_name, team_id, @@ -3231,8 +3622,8 @@ def delete_teams_app_from_channel( request_header = self.request_header() - logger.info( - "Delete Tab -> %s (%s) from Channel -> %s (%s) of Microsoft 365 Teams -> %s (%s); calling -> %s", + logger.debug( + "Delete Tab -> '%s' (%s) from Channel -> '%s' (%s) of Microsoft 365 Teams -> '%s' (%s); calling -> %s", tab_name, tab_id, channel_name, @@ -3245,11 +3636,11 @@ def delete_teams_app_from_channel( retries = 0 while True: response = requests.delete( - request_url, headers=request_header, timeout=60 + request_url, headers=request_header, timeout=REQUEST_TIMEOUT ) if response.ok: - logger.info( - "Tab -> %s (%s) has been deleted from Channel -> %s (%s) of Microsoft 365 Teams -> %s (%s)", + logger.debug( + "Tab -> '%s' (%s) has been deleted from Channel -> '%s' (%s) of Microsoft 365 Teams -> '%s' (%s)", tab_name, tab_id, channel_name, @@ -3260,7 +3651,7 @@ def delete_teams_app_from_channel( break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3274,7 +3665,7 @@ def delete_teams_app_from_channel( retries += 1 else: logger.error( - "Failed to delete Tab -> %s (%s) for M365 Team -> %s (%s) and Channel -> %s (%s); status -> %s; error -> %s", + "Failed to delete Tab -> '%s' (%s) for M365 Team -> '%s' (%s) and Channel -> '%s' (%s); status -> %s; error -> %s", tab_name, tab_id, team_name, @@ -3334,22 +3725,25 @@ def add_sensitivity_label( request_url = self.config()["securityUrl"] + "/sensitivityLabels" request_header = self.request_header() - logger.info( - "Create M365 sensitivity label -> %s; calling -> %s", name, request_url + logger.debug( + "Create M365 sensitivity label -> '%s'; calling -> %s", name, request_url ) # Send the POST request to create the label response = requests.post( - request_url, headers=request_header, data=json.dumps(payload), timeout=60 + request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, ) # Check the response status code if response.status_code == 201: - logger.info("Label -> %s has been created successfully!", name) + logger.debug("Label -> '%s' has been created successfully!", name) return response else: logger.error( - "Failed to create the M365 label -> %s! Response status code -> %s", + "Failed to create the M365 label -> '%s'! Response status code -> %s", name, response.status_code, ) @@ -3377,8 +3771,8 @@ def assign_sensitivity_label_to_user(self, user_email: str, label_name: str): ) request_header = self.request_header() - logger.info( - "Assign label -> %s to user -> %s; calling -> %s", + logger.debug( + "Assign label -> '%s' to user -> '%s'; calling -> %s", label_name, user_email, request_url, @@ -3387,13 +3781,13 @@ def assign_sensitivity_label_to_user(self, user_email: str, label_name: str): retries = 0 while True: response = requests.post( - request_url, headers=request_header, json=body, timeout=60 + request_url, headers=request_header, json=body, timeout=REQUEST_TIMEOUT ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3407,7 +3801,7 @@ def assign_sensitivity_label_to_user(self, user_email: str, label_name: str): retries += 1 else: logger.error( - "Failed to assign label -> %s to M365 user -> %s; status -> %s; error -> %s", + "Failed to assign label -> '%s' to M365 user -> '%s'; status -> %s; error -> %s", label_name, user_email, response.status_code, @@ -3438,7 +3832,7 @@ def upload_outlook_app( # request_header = self.request_header() - logger.info("Install Outlook Add-in from %s (NOT IMPLEMENTED)", app_path) + logger.debug("Install Outlook Add-in from '%s' (NOT IMPLEMENTED)", app_path) response = None @@ -3464,20 +3858,22 @@ def get_app_registration( ] + "?$filter=displayName eq '{}'".format(app_registration_name) request_header = self.request_header() - logger.info( - "Get Azure App Registration -> %s; calling -> %s", + logger.debug( + "Get Azure App Registration -> '%s'; calling -> %s", app_registration_name, request_url, ) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=60) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3491,7 +3887,7 @@ def get_app_registration( retries += 1 else: logger.error( - "Cannot find Azure App Registration -> %s; status -> %s; error -> %s", + "Cannot find Azure App Registration -> '%s'; status -> %s; error -> %s", app_registration_name, response.status_code, response.text, @@ -3573,13 +3969,13 @@ def add_app_registration( request_url, headers=request_header, json=app_registration_data, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3593,7 +3989,7 @@ def add_app_registration( retries += 1 else: logger.error( - "Cannot add App Registration -> %s; status -> %s; error -> %s", + "Cannot add App Registration -> '%s'; status -> %s; error -> %s", app_registration_name, response.status_code, response.text, @@ -3632,8 +4028,8 @@ def update_app_registration( request_url = self.config()["applicationsUrl"] + "/" + app_registration_id request_header = self.request_header() - logger.info( - "Update App Registration -> %s (%s); calling -> %s", + logger.debug( + "Update App Registration -> '%s' (%s); calling -> %s", app_registration_name, app_registration_id, request_url, @@ -3645,13 +4041,13 @@ def update_app_registration( request_url, headers=request_header, json=app_registration_data, - timeout=60, + timeout=REQUEST_TIMEOUT, ) if response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -3665,7 +4061,7 @@ def update_app_registration( retries += 1 else: logger.error( - "Cannot update App Registration -> %s (%s); status -> %s; error -> %s", + "Cannot update App Registration -> '%s' (%s); status -> %s; error -> %s", app_registration_name, app_registration_id, response.status_code, @@ -3674,3 +4070,522 @@ def update_app_registration( return None # end method definition + + def get_mail( + self, + user_id: str, + sender: str, + subject: str, + num_emails: int | None = None, + show_error: bool = False, + ) -> dict | None: + """Get email from inbox of a given user and a given sender (from) + This requires Mail.Read Application permissions for the Azure App being used. + + Args: + user_id (str): M365 ID of the user + sender (str): sender email address to filter for + num_emails (int, optional): number of matching emails to retrieve + show_error (bool): whether or not an error should be displayed if the + user is not found. + Returns: + dict: Email or None of the request fails. + """ + + # Attention: you can easily run in limitation of the MS Graph API. If selection + filtering + # is too complex you can get this error: "The restriction or sort order is too complex for this operation." + # that's why we first just do the ordering and then do the filtering on sender and subject + # separately + request_url = ( + self.config()["usersUrl"] + + "/" + + user_id + # + "/messages?$filter=from/emailAddress/address eq '{}' and contains(subject, '{}')&$orderby=receivedDateTime desc".format( + + "/messages?$orderby=receivedDateTime desc" + ) + if num_emails: + request_url += "&$top={}".format(num_emails) + + request_header = self.request_header() + + logger.debug( + "Retrieve mails for user -> %s from -> '%s' with subject -> '%s'; calling -> %s", + user_id, + sender, + subject, + request_url, + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + response = self.parse_request_response(response) + messages = response["value"] if response else [] + + # Filter the messages by sender and subject in code + filtered_messages = [ + msg + for msg in messages + if msg.get("from", {}).get("emailAddress", {}).get("address") + == sender + and subject in msg.get("subject", "") + ] + response["value"] = filtered_messages + return response + + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + elif response.status_code in [502, 503, 504] and retries < 3: + logger.warning( + "M365 Graph API delivered server side error -> %s; retrying in %s seconds...", + response.status_code, + (retries + 1) * 60, + ) + time.sleep((retries + 1) * 60) + retries += 1 + else: + if show_error: + logger.error( + "Cannot retrieve emails for user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + else: + logger.warning( + "Cannot retrieve emails for user -> %s; status -> %s; warning -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_mail_body(self, user_id: str, email_id: str) -> str: + """Get full email body for a given email ID + This requires Mail.Read Application permissions for the Azure App being used. + + Args: + user_id (str): M365 ID of the user + email_id (str): M365 ID of the email + Returns: + str: Email body or None of the request fails. + """ + + request_url = ( + self.config()["usersUrl"] + + "/" + + user_id + + "/messages/" + + email_id + + "/$value" + ) + + request_header = self.request_header() + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return response.content.decode("utf-8") + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + elif response.status_code in [502, 503, 504] and retries < 3: + logger.warning( + "M365 Graph API delivered server side error -> %s; retrying in %s seconds...", + response.status_code, + (retries + 1) * 60, + ) + time.sleep((retries + 1) * 60) + retries += 1 + else: + logger.error( + "Cannot retrieve emails body for user -> %s and email -> %s; status -> %s; error -> %s", + user_id, + email_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def extract_url_from_message_body( + self, + message_body: str, + search_pattern: str, + multi_line: bool = False, + multi_line_end_marker: str = "%3D", + line_end_marker: str = "=", + replacements: list | None = None, + ) -> str | None: + """Parse the email body to extract (a potentially multi-line) URL from the body. + + Args: + message_body (str): Text of the Email body + search_pattern (str): Pattern thatneeds to be in first line of the URL. This + makes sure it is the right URL we are looking for. + multi_line (bool, optional): Is the URL spread over multiple lines?. Defaults to False. + multi_line_end_marker (str, optional): If it is a multi-line URL, what marks the end + of the URL in the last line? Defaults to "%3D". + line_end_marker (str, optional): What makrs the end of lines 1-(n-1)? Defaults to "=". + Returns: + str: URL text thathas been extracted. + """ + + if not message_body: + return None + + # Split all the lines after a CRLF: + lines = [line.strip() for line in message_body.split("\r\n")] + + # Filter out the complete URL from the extracted URLs + found = False + + url = "" + + for line in lines: + if found: + # Remove line end marker - many times a "=" + if line.endswith(line_end_marker): + line = line[:-1] + for replacement in replacements: + line = line.replace(replacement["from"], replacement["to"]) + # We consider an empty line after we found the URL to indicate the end of the URL: + if line == "": + break + url += line + if multi_line and line.endswith(multi_line_end_marker): + break + if not search_pattern in line: + continue + # Fine https:// in the current line: + index = line.find("https://") + if index == -1: + continue + # If there's any text in front of https in that line cut it: + line = line[index:] + # Remove line end marker - many times a "=" + if line.endswith(line_end_marker): + line = line[:-1] + for replacement in replacements: + line = line.replace(replacement["from"], replacement["to"]) + found = True + url += line + if not multi_line: + break + + return url + + # end method definition + + def delete_mail(self, user_id: str, email_id: str) -> dict | None: + """Delete email from inbox of a given user and a given email ID. + This requires Mail.ReadWrite Application permissions for the Azure App being used. + + Args: + user_id (str): M365 ID of the user + email_id (str): M365 ID of the email + Returns: + dict: Email or None of the request fails. + """ + + request_url = ( + self.config()["usersUrl"] + "/" + user_id + "/messages/" + email_id + ) + + request_header = self.request_header() + + retries = 0 + while True: + response = requests.delete( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + elif response.status_code in [502, 503, 504] and retries < 3: + logger.warning( + "M365 Graph API delivered server side error -> %s; retrying in %s seconds...", + response.status_code, + (retries + 1) * 60, + ) + time.sleep((retries + 1) * 60) + retries += 1 + else: + logger.error( + "Cannot delete email -> %s from inbox of user -> %s; status -> %s; error -> %s", + email_id, + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def email_verification( + self, + user_email: str, + sender: str, + subject: str, + url_search_pattern: str, + line_end_marker: str = "=", + multi_line: bool = True, + multi_line_end_marker: str = "%3D", + replacements: list | None = None, + max_retries: int = 6, + use_browser_automation: bool = False, + password: str = "", + password_field_id: str = "", + password_confirmation_field_id: str = "", + password_submit_xpath: str = "", + terms_of_service_xpath: str = "", + ) -> bool: + """Process email verification + + Args: + user_email (str): Email address of user recieving the verification mail. + sender (str): Email sender (address) + subject (str): Email subject to look for (can be substring) + url_search_pattern (str): String the URL needs to contain to identify it. + multi_line_end_marker (str): If the URL spans multiple lines this is the "end" marker for the last line. + replacements (list): if the URL needs some treatment these replacements can be applied. + Result: + bool: True = Success, False = Failure + """ + + # Determine the M365 user for the current user by + # the email address: + m365_user = self.get_user(user_email=user_email) + m365_user_id = self.get_result_value(m365_user, "id") + if not m365_user_id: + logger.warning("Cannot find M365 user -> %s", user_email) + return False + + if replacements is None: + replacements = [{"from": "=3D", "to": "="}] + + retries = 0 + while retries < max_retries: + response = self.get_mail( + user_id=m365_user_id, + sender=sender, + subject=subject, + show_error=False, + ) + if response and response["value"]: + emails = response["value"] + # potentially there may be multiple matching emails, + # we want the most recent one (from today): + latest_email = max(emails, key=lambda x: x["receivedDateTime"]) + # Extract just the date: + latest_email_date = latest_email["receivedDateTime"].split("T")[0] + # Get the current date (today): + today_date = datetime.today().strftime("%Y-%m-%d") + # We do a sanity check here: the verification mail should be from today, + # otherwise we assume it is an old mail and we need to wait for the + # new verification mail to yet arrive: + if latest_email_date != today_date: + logger.info( + "Verification email not yet received (latest mail from -> %s). Waiting %s seconds...", + latest_email_date, + 10 * (retries + 1), + ) + time.sleep(10 * (retries + 1)) + retries += 1 + continue + email_id = latest_email["id"] + # The full email body needs to be loaded with a separate REST call: + body_text = self.get_mail_body(user_id=m365_user_id, email_id=email_id) + # Extract the verification URL. + if body_text: + url = self.extract_url_from_message_body( + message_body=body_text, + search_pattern=url_search_pattern, + line_end_marker=line_end_marker, + multi_line=multi_line, + multi_line_end_marker=multi_line_end_marker, + replacements=replacements, + ) + else: + url = "" + if not url: + logger.warning("Cannot find verification link in the email body!") + return False + # Simulate a "click" on this URL: + if use_browser_automation: + # Core Share needs a full browser: + browser_automation_object = BrowserAutomation( + take_screenshots=True, + automation_name="email-verification", + ) + logger.info( + "Open URL -> %s to verify account or email change (using browser automation)", + url, + ) + success = browser_automation_object.get_page(url) + if success: + user_interaction_required = False + logger.info( + "Successfully opened URL. Browser title is -> '%s'.", + browser_automation_object.get_title(), + ) + if password_field_id: + password_field = browser_automation_object.find_elem( + find_elem=password_field_id, show_error=False + ) + if password_field: + # The subsequent processing is only required if + # the returned page requests a password change: + user_interaction_required = True + logger.info( + "Found password field on returned page - it seems email verification requests password entry!" + ) + result = browser_automation_object.find_elem_and_set( + find_elem=password_field_id, + elem_value=password, + is_sensitive=True, + ) + if not result: + logger.error( + "Failed to enter password in field -> '%s'", + password_field_id, + ) + success = False + else: + logger.info( + "No user interaction required (no password change or terms of service acceptance)." + ) + if user_interaction_required and password_confirmation_field_id: + password_confirm_field = ( + browser_automation_object.find_elem( + find_elem=password_confirmation_field_id, + show_error=False, + ) + ) + if password_confirm_field: + logger.info( + "Found password confirmation field on returned page - it seems email verification requests consecutive password entry!" + ) + result = browser_automation_object.find_elem_and_set( + find_elem=password_confirmation_field_id, + elem_value=password, + is_sensitive=True, + ) + if not result: + logger.error( + "Failed to enter password in field -> '%s'", + password_confirmation_field_id, + ) + success = False + if user_interaction_required and password_submit_xpath: + password_submit_button = ( + browser_automation_object.find_elem( + find_elem=password_submit_xpath, + find_method="xpath", + show_error=False, + ) + ) + if password_submit_button: + logger.info( + "Submit password change dialog with button -> '%s' (found with XPath -> %s)", + password_submit_button.text, + password_submit_xpath, + ) + result = browser_automation_object.find_elem_and_click( + find_elem=password_submit_xpath, find_method="xpath" + ) + if not result: + logger.error( + "Failed to press submit button -> %s", + password_submit_xpath, + ) + success = False + # TODO is this sleep required? The Terms of service dialog has some weird animation + # which may require this. It seems it is rtequired! + time.sleep(1) + terms_accept_button = browser_automation_object.find_elem( + find_elem=terms_of_service_xpath, + find_method="xpath", + show_error=False, + ) + if terms_accept_button: + logger.info( + "Accept terms of service with button -> '%s' (found with XPath -> %s)", + terms_accept_button.text, + terms_of_service_xpath, + ) + result = browser_automation_object.find_elem_and_click( + find_elem=terms_of_service_xpath, + find_method="xpath", + ) + if not result: + logger.error( + "Failed to accept terms of service with button -> '%s'", + terms_accept_button.text, + ) + success = False + else: + logger.info("No Terms of Service acceptance required.") + # end if use_browser_automation + else: + # Salesforce (other than Core Share) is OK with the simple HTTP GET request: + logger.info("Open URL -> %s to verify account or email change", url) + response = self._http_object.http_request(url=url, method="GET") + success = response and response.ok + + if success: + logger.info("Remove email from inbox of user -> %s...", user_email) + response = self.delete_mail(user_id=m365_user_id, email_id=email_id) + if not response: + logger.warning( + "Couldn't remove the mail from the inbox of user -> %s", + user_email, + ) + # We have success now and can break from the while loop + return True + else: + logger.error( + "Failed to process e-mail verification for user -> %s", + user_email, + ) + return False + # end if response and response["value"] + else: + logger.info( + "Verification email not yet received (no mails with sender -> %s and subject -> '%s' found). Waiting %s seconds...", + sender, + subject, + 10 * (retries + 1), + ) + time.sleep(10 * (retries + 1)) + retries += 1 + # end while + + logger.warning( + "Verification mail for user -> %s has not arrived in time.", user_email + ) + + return False + + # end method definition diff --git a/pyxecm/customizer/payload.py b/pyxecm/customizer/payload.py index 7c45b65..799d1dc 100644 --- a/pyxecm/customizer/payload.py +++ b/pyxecm/customizer/payload.py @@ -8,20 +8,24 @@ * OTDS licenses * Extended ECM users and groups * Microsoft 365 user, groups, and teams +* Salesforce users and groups +* SuccessFactors users +* Core Share users and groups * Extended ECM Admin Settings (LLConfig) * Extended ECM External System Connections (SAP, SuccessFactors, ...) * Extended ECM Transport Packages (scenarios and demo content) * Extended ECM CS Applications (typically based on Web Reports) * Extended ECM Web Reports to run * Extended ECM Workspaces to create (incl. members, workspace relationships) -* Extended ECM user photos, user favorites and user settings -* Extended ECM items to create and permissions to apply -* Extended ECM items to rename +* Extended ECM User photos, user favorites and user settings +* Extended ECM Items to create and permissions to apply +* Extended ECM Items to rename * Extended ECM Documents to generate (from templates) -* Extended ECM assignments (used e.g. for Government scenario) +* Extended ECM Assignments (used e.g. for Government scenario) * Extended ECM Records Management settings, Security Clearance, Supplemental Markings, and Holds * SAP RFCs (Remote Function Calls) * Commands to execute in Kubernetes Pods +* Browser Automations (for things that cannot be automated via an API) This code typically runs in a container as part of the cloud automation. @@ -39,11 +43,25 @@ write_status_file: Write a status file into the Admin Personal Workspace in Extended ECM to indicate that the payload section has been deployed successfully get_status_file: Retrieve the content of the status file +get_payload: return the payload data structure +get_users: return list of users +get_groups: return list of groups +get_workspaces: return list of workspaces +get_otcs_frontend: return OTCS object for OTCS frontend +get_otcs_backend: return OTCS object for OTCS backend +get_otds: return OTDS object +get_k8s: return the Kubernetes object +get_m365: return the M365 object +generate_password: Generate random passwords with a given specification -determine_group_id: determine the id of a group - either from payload or from OTCS +determine_group_id: determine the id of a Extended ECM group - either from payload or from OTCS +determine_group_id_m365: determine the id of a M365 group - either from payload or from M365 via Graph API +determine_group_core_share: determine the id of a Core Share group - either from payload or from Core Share directly determine_user_id: determine the id of a user - either from payload or from OTCS determine_user_id_m365: determine the id of a M365 user - either from payload or from M365 via Graph API +determine_user_id_core_share: Determine the ID of a Core Share user - either from payload or from Core Share directly determine_workspace_id: determine the nodeID of a workspace - either from payload or from OTCS +determine_workspace_type_and_template_id: determine the IDs of type and template based on the provided names process_payload: process payload (main method) process_web_hooks: process list of web hooks @@ -54,13 +72,22 @@ process_auth_handlers: process the OTDS auth handlers (some are implicitly generated by external system payload) process_trusted_sites: process the OTDS trusted sites process_system_attributes: process the OTDS system attributes + process_group_placeholders: collect group placeholders process_user_placeholders: collect user placeholders + process_groups: process Extended ECM user groups process_groups_m365: process M365 user groups +process_groups_salesforce: process Salesforce user groups +process_groups_core_share: process Core Share user groups + process_users: process Extended ECM users process_users_m365: process M365 users process_users_sap: process users that are SAP enabled (if SAP is enabled) +process_users_successfactors: process users that are SuccessFactors enabled (if SuccessFactors is enabled) +process_users_salesforce: Process users in payload and sync them with Salesforce (if Salesforce is enabled) +process_users_core_share: Process users in payload and sync them with Core Share (if Core Share is enabled) + process_teams_m365: process groups in payload and create matching M365 Teams cleanup_stale_teams_m365: Delete Microsoft Teams that are left-overs from former deployments. This method is currently not used. @@ -70,10 +97,15 @@ process_transport_packages: process Extended ECM transport packages process_user_photos: process Extended ECM user photos (user profile) process_user_photos_m365: process user photos in payload and assign them to Microsoft 365 users. +process_user_photos_salesforce: process user photos in payload and assign them to Salesforce users. + process_business_object_types: process Extended ECM business object types (needs to run after process_transport_packages) + process_workspace_types: process Extended ECM workspace types (needs to run after process_transport_packages) +prepare_workspace_create_form: Prepare the category structure for the workspace creation. +prepare_workspace_business_objects: Prepare the business object data for the workspace creation. process_workspaces: process Extended ECM workspace instances process_workspace_relationships: process Extended ECM workspace relationships process_workspace_members: process Extended ECM workspace members (users and groups) @@ -101,16 +133,27 @@ process_browser_automations: process Selenium-based browser automation payload init_sap: initalize SAP object for RFC communication process_sap_rfcs: process SAP Remote Function Calls (RFC) to trigger automation in SAP S/4HANA +init_successfactors: initalize SuccessFactors object for user updates in SuccessFactors init_salesforce: initialize Salesforce object for Salesforce API communication -get_payload: return the payload data structure -get_users: return list of users -get_groups: return list of groups -get_workspaces: return list of workspaces -get_otcs_frontend: return OTCS object for OTCS frontend -get_otcs_backend: return OTCS object for OTCS backend -get_otds: return OTDS object -get_k8s: return the Kubernetes object +process_bulk_datasource: Process a datasource and load it into a Data object +process_bulk_workspaces: Process workspaces in payload and bulk create them in Extended ECM (multi-threaded) +process_bulk_categories: Helper method to replace the value placeholders the bulk category structures with the Pandas Series (row) +process_bulk_workspaces_worker: This is the thread worker to create workspaces in bulk +lookup_data_source_value: Lookup a value in a given data source (specified by payload dict). + If the data source has not been loaded before then load the data source. +process_bulk_workspaces_synonym_lookup: Use a datasource to lookup the workspace name (or all fields) and ID using a given synonym. +process_bulk_workspaces_lookup: Use a combination of workspace name, workspace type, and workspace datasource + (using synonyms) to lookup the workspace name and ID +process_bulk_workspace_relationships: Process workspaces in payload and bulk create them in Extended ECM (multi-threaded) +process_bulk_workspace_relationships_worker: This is the thread worker to create workspaces relationships in bulk. +prepare_category_data: Prepare the category information for a new or updated item (document or workspace) +process_bulk_documents: Process bulkDocuments in payload and bulk create them in Extended ECM (multi-threaded) +process_bulk_documents_worker: This is the thread worker to download + create documents in bulk. + +replace_bulk_placeholders: Define a regular expression pattern to match placeholders like "{variable.subvalue}" +cleanup_value: Cleanup field values based on regular expressions +evaluate_conditions: Evaluate given conditions for a DataFrame series (i.e. a row). """ @@ -127,7 +170,12 @@ import re import string from typing import Callable -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs +import threading +import traceback +import copy +import time +import fnmatch import base64 import gzip @@ -135,16 +183,38 @@ import hcl2.api # OpenText specific modules: -from pyxecm import OTAC, OTCS, OTDS, OTIV +from pyxecm import OTAC, OTCS, OTDS, OTIV, OTMM, CoreShare from pyxecm.customizer.k8s import K8s from pyxecm.customizer.m365 import M365 from pyxecm.customizer.sap import SAP +from pyxecm.customizer.successfactors import SuccessFactors from pyxecm.customizer.salesforce import Salesforce +from pyxecm.customizer.servicenow import ServiceNow +from pyxecm.customizer.pht import PHT from pyxecm.customizer.browser_automation import BrowserAutomation +from pyxecm.helper.data import Data from pyxecm.helper.web import HTTP +from pyxecm.helper.xml import XML logger = logging.getLogger("pyxecm.customizer.payload") +try: + import pandas as pd + + pandas_installed = True +except ModuleNotFoundError as module_exception: + logger.warning( + "Module pandas is not installed. Customizer will not support bulk workspace creation." + ) + pandas_installed = False + +THREAD_NUMBER = 3 +BULK_THREAD_NUMBER = int(os.environ.get("BULK_THREAD_NUMBER", 1)) +BULK_DOCUMENT_PATH = "/tmp/bulkDocuments/" +ENABLE_MULTI_THREADING = pandas_installed and THREAD_NUMBER > 1 + +data_load_lock = threading.Lock() + class Payload: """Used to process Terrarium payload.""" @@ -158,11 +228,17 @@ class Payload: _otac: OTAC | None _otds: OTDS _otiv: OTIV | None + _otmm: OTMM | None + _pht: PHT | None + _otcs_source: OTCS | None _k8s: K8s | None - _web: HTTP | None + _http_object: HTTP | None _m365: M365 | None + _core_share: CoreShare | None _sap: SAP | None + _successfactors: SuccessFactors | None _salesforce: Salesforce | None + _servicenow: ServiceNow | None _browser_automation: BrowserAutomation | None _custom_settings_dir = "" @@ -178,164 +254,275 @@ class Payload: # - name (string) # - enabled (bool) # - restart (bool) + # See below and method init_payload() for details of existing payload sections _payload_sections = [] # # Initialize payload section variables. They are all list of dicts: # - # webhooks and webhooks_post: List of webHooks. List items are dicts with these key: - # - enabled (bool) - # - description (str) - # - url (str) + # _webhooks and webhooks_post: List of webHooks. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - description (str, optional) + # - url (str, mandatory) # - method (str) - either POST, PUT, GET - # - payload (dict) - # - headers (dict) + # - payload (dict, optional, default = {}) + # - headers (dict, optional, default = {}) _webhooks = [] _webhooks_post = [] - # partitions: list of dicts with these key / value pairs: - # - enabled (bool) - # - name (str) - # - description (str) - # - synced (bool) - # - access_role (str) - # - licenses (list) + # _partitions: List of OTDS partitions (for users and groups). Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - description (str, optional) + # - access_role (str, optional) + # - licenses (list, optional) _partitions = [] - # oauth_clients: list of dicts with these key / value pairs: - # - enabled (bool) - # - name (str) - # - description (str) - # - confidential (bool) - # - partition (str) - # - redirect_urls (list) + # _oauth_clients: List of OTDS OAuth Clients. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - description (str, optional, default = "") + # - confidential (bool, optional, default = True) + # - partition (str, optional, default = "Global") + # - redirect_urls (list, optional, default = []) # - permission_scopes (list) # - default_scopes (list) - # - allow_impersonation (bool) + # - allow_impersonation (bool, optional, default = True) + # - secret (str, optional, default = "") - option to provide a predefined secret _oauth_clients = [] - # oauth_handlers: list of dicts with these key / value pairs: - # - enabled (bool) - # - name (str) - # - description (str) - # - scope (str) - # - type (str) - like SAML, SAP, OAUTH - # - priority (int) - # - active_by_default (bool) + # _oauth_handlers: List of OTDS OAuth handler. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - description (str, optional) + # - scope (str, optional, default = None) + # - type (str, mandatory) - handler type, like SAML, SAP, OAUTH + # - priority (int, optional) + # - active_by_default (bool, optional, default = False) + # - provider_name (str, mandatory for type = SAML and type = SAP) # - auth_principal_attributes (list) - # - saml_url (str) - # - otds_sp_endpoint (str) - # - nameid_format (str) + # - nameid_format (str, optional) + # - saml_url (str, mandatory for type = SAML) + # - otds_sp_endpoint (str, mandatory for type = SAML) + # - certificate_file (str, mandatory for type = SAP) + # - certificate_password (str, mandatory for type = SAP) + # - client_id (str, mandatory for type = OAUTH) + # - client_secret (str, mandatory for type = OAUTH) + # - authorization_endpoint (str, mandatory for type = OAUTH) + # - token_endpoint (str, optional for type = OAUTH) + # - scope_string (str, optional) _auth_handlers = [] - # trusted_sites: list of dicts with these key / value pairs: - # - enabled (bool) + # _trusted_sites: List of OTDS trasted sites. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) # - url (str) _trusted_sites = [] - # system_attributes: list of dicts with these key / value pairs: - # - enabled (bool) - # - name (str) - # - value (str) - # - description (str) + # _system_attributes: List of OTDS System Attributes. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - value (str, mandatory) + # - description (str, optional) _system_attributes = [] - # groups: List of groups. List items are dicts with these key / value pairs: - # - name (str), - # - parent_groups (list), - # - enable_o365 (bool) + # _groups: List of groups. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory), + # - parent_groups (list, optional), + # - enable_o365 (bool, optional, default = False) + # - enable_salesforce (bool, optional, default = False) + # - enable_core_share (bool, optional, default = False) _groups = [] - # users: List of users. List items are dicts with these key / value pairs: - # - enabled (bool) - # - name (str) (= login) - # - password (str) - # - firstname (str) - # - lastname (str) - # - email (str) - # - base_group (str) - # - groups (list) - # - favorites (list of str) - # - security_clearance (int) + # _users: List of users. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) (= login) + # - password (str, optional, will be generated if not provided) + # - firstname (str, optional, default = "") + # - lastname (str, optional, default = "") + # - title (str, optional, default = "") + # - email (str, optional, default = "") + # - base_group (str, optional, default = "DefaultGroup") + # - company (str, optional, default = "Innovate") - currently used for Salesforce users only + # - privileges (list, optional, default = ["Login", "Public Access"]) + # - groups (list, optional) + # - favorites (list of str, optional) + # - security_clearance (int, optional) # - supplemental_markings (list of str) - # - enable_sap (bool) - # - enable_o365 (bool) - # - m365_skus (list of str) + # - location (str, optional, default = "US") - only relevant for M365 users + # - enable_sap (bool, optional, default = False) + # - enable_successfactors (bool, optional, default = False) + # - enable_salesforce (bool, optional, default = False) + # - enable_o365 (bool, optional, default = False) + # - enable_core_share (bool, optional, default = False) + # - m365_skus (list of str) - only relevant for M365 users # - extra_attributes (list of dict) _users = [] + _user_customization = True - # admin_settings: list of admin settings (XML file to import) - # - enabled (bool) - # - description (str) - # - filename (str) - without path + # _admin_settings: list of admin settings (XML file to import). Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - description (str, optional) + # - filename (str, mandatory) - without path _admin_settings = [] _admin_settings_post = [] # exec_pod_commands: list of commands to be executed in the pods # list elements need to be dicts with pod name, command, etc. + # - enabled (bool, optional, default = True) + # - command (str, mandatory) + # - pod_name (str, mandatory) + # - description (str, optional) + # - interactive (bool, optional, default = False) _exec_pod_commands = [] - # external_systems (list): List of external systems. Each list element is a dict with - # - enabled (bool) - # - external_system_type (str) - # - external_system_name (str) + # external_systems (list): List of external systems. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - external_system_type (str, mandatory) - possible values: SuccessFactors, SAP, Salesforce, Appworks Platform, Business Scenario Sample + # - external_system_name (str, mandatory) - for SAP this is the System ID # - external_system_number (str) # - description (str) - # - as_url (str) - # - base_url (str) - # - client (str) - # - username (str) - # - password (str) - # - certificate_file (str) - # - certificate_password (str) - # - destination (str) - # - archive_logical_name (str) - # - archive_certificate_file (str) + # - as_url (str, mandatory) + # - base_url (str, optional, default = "") + # - client (str, optional - only relevant for SAP, default = 100) + # - destination (str, optional - only relevant for SAP, default = "") + # - group (str, optional - only relevant for SAP, default = "PUBLIC") + # - username (str, optional - depends on external_system_type) + # - password (str, optional - depends on external_system_type) + # - certificate_file (str, optional - only relevant for SAP, used for Auth Handler) + # - certificate_password (str, optional - only relevant for SAP, used for Auth Handler) + # - external_system_hostname (str, mandatory - only relevant for SAP) + # - archive_logical_name (str, optional - only relevant for SAP) + # - archive_certificate_file (str, optional - only relevant for SAP) + # - oauth_client_id (str, optional) + # - oauth_client_secret (str, optional) + # - skip_connection_test (bool, optional, default = False) _external_systems = [] - # transport_packages (list): List of transport packages systems. Each list element is a - # dict with "url", "name", and "description" keys. + # _transport_packages (list): List of transport packages systems. Each list element is a + # dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - url (str, mandatory) + # - description (str, optional, default = "") + # - replacements (list, optional, default = None) + # - extractions (list, optional, default = None) _transport_packages = [] _content_transport_packages = [] _transport_packages_post = [] + # _business_object_types (list): Business object types are not in payload + # but retrieved from transport package: _business_object_types = [] + + # _workspace_types (list): Workspace types are not in payload but imported with transport package: _workspace_types = [] + + # _workspace_templates (list): actually these are also imported via transport + # but used if we want to define standard members on template basis. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - type_name (str, mandatory) + # - template_name (str, mandatory) + # - members (list, mandatory) + # * role (str, mandatory) + # * users (list, optional, default = []) + # * groups (list, optional, default = []) _workspace_templates = [] + + # _workspaces (list): list of Extended ECM business workspaces. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - id (str, mandatory) - logical ID of the workspace - used for cross-referencing inside the payload. This is NOT this node ID! + # - name (str, mandatory) + # - description (str, optional, default = "") + # - type_name (str, mandatory) + # - template_name (str, optional, default = just take first template) + # - business_objects (list, optional, default = []) + # - parent_id (str, optional, default = None) - this is a LOGICAL ID used in the payload - not the node ID! + # - parent_path (list, optional, default = None) + # - categories (list, optional, default = None) + # * name (str, mandatory) + # * set (str, default = "") + # * row (int, optional) + # * attribute (str, mandatory) + # * value (str, mandatory) + # - nickname (str, optional, default = ignore) + # - photo_nickname (str, optional, default = ignore) + # - rm_classification_path (list, optional, default = []) + # - classification_pathes (list of lists, optional, default = []) + # - members (list, optional, default = []) + # * role (name) + # * users (list, optional, default = []) + # * groups (list, optional, default = []) + # - relationships (list, optional, default = []) - list of strings with logical workspace IDs _workspaces = [] + + # _sap_rfcs (list). Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - description (str) + # - parameters (dict) + # - call_options (dict) _sap_rfcs = [] + + # _web_reports (list). Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - nickname (str, mandatory) + # - description (str, optional, default = "") + # - restart (bool, optional, default = False) + # - parameters (dict, optional, default = {}) - the dict keys are the parameter names and the dict values are the actual parameter values _web_reports = [] _web_reports_post = [] - # cs_applications (list): List of Content Server Applications to deploy. - # - enabled (bool) - # - name (str) - # - descriptions (str) + # _cs_applications (list): List of Content Server Applications to deploy. + # Each list element is a dict with + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - descriptions (str, optional, default = "") _cs_applications = [] - # additional_group_members: List of memberships to establish. Each element + # _additional_group_members: List of memberships to establish. Each element # is a dict with these keys: # - parent_group (string) # - user_name (string) # - group_name (string) _additional_group_members = [] - # additional_access_role_members: List of memberships to establish. Each element + # _additional_access_role_members: List of memberships to establish. Each element # is a dict with these keys: # - access_role (string) # - user_name (string) # - group_name (string) # - partition_name (string) _additional_access_role_members = [] + + # _renamings (list). List of items to be renamed. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - nodeid (int, mandatory if no volume is specified) - this is the technical OTCS ID - typically only known for some preinstalled items + # - volume (int, mandatory if no nodeid is specified) _renamings = [] - # items: List of items to create in Extended ECM - # - enabled (bool) + # _items: List of items to create in Extended ECM + # - enabled (bool, optional, default = True) # - parent_nickname (str) # - parent_path (list) # - name (str) - # - description (str) + # - description (str, optional, default = "") # - type (str) # - url (str) - "" means not set # - original_nickname @@ -343,46 +530,299 @@ class Payload: _items = [] _items_post = [] - # permissions: List of permissions changes to apply - # - path (list) - # - volume (int) - # - public_permissions (list) - # - groups (list) + # _permissions: List of permissions changes to apply + # - path (list, optional) + # - volume (int, optional) + # - nickname (str, optional) + # - owner_permissions (list, optional) + # - owner_group_permissions (list, optional) + # - public_permissions (list, optional) + # - groups (list, optional) # + name (str) # + permissions (list) - # - users (list) + # - users (list, optional) # + name (str) # + permissions (list) # - apply_to (int) _permissions = [] _permissions_post = [] - # assignments: List of assignments. Each element is a dict with these keys: - # - subject (string) - # - instruction (string) - # - workspace (string) - # - nickname (string) + # _assignments: List of assignments. Each element is a dict with these keys: + # - enabled (bool, optional, default = True) + # - subject (str, mandatory) + # - instruction (str, optional) + # - workspace (str, optional if nickname is specified) + # - nickname (str, optional if workspace is specified) # - groups (list) # - users (list) _assignments = [] - _workspace_template_registrations = [] + + # _doc_generators: List of document generators that use the Document Template capabilities + # of Extended ECM. Each element is a dict with these keys: + # - enabled (bool, optional, default = True) + # - workspace_type (str, mandatory) + # - template_path (list, mandatory) + # - classification_path (list, mandatory) + # - category_name (str, optional, default = "") + # - workspace_folder_path (list, optional, default = []) - default puts the document in the workspace root + # - exec_as_user (str, optional, default = "") + _doc_generators = [] + + # _browser_automations: List of browser automation for things that can only be + # automated via the web user interface. Each element is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - base_url (str, mandatory) + # - user_name (str, optional) + # - password (str, optional) + # - automations (list, mandatory) + # * type (str, optional, default = "") + # * page (str, optional, default = "") + # * elem (str, optional, default = "") + # * find (str, optional, default = "id") + # * value (str, optional, default = "") + # - wait-time (float, optional, default = 15.0) - wati time in seconds + # - debug (bool, optional, default = False) - if True take screenshots and save to container + _browser_automations = [] + _browser_automations_post = [] + + # _security_clearances: List of Security Clearances. Each element is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - level (str, mandatory) + # - description (str, optional, default = "") _security_clearances = [] + + # _supplemental_markings: List of supplemental markings. Each element is a dict with these keys: + # - enabled (bool, optional, default = True) + # - code (str, mandatory) + # - description (str, optional, default = "") _supplemental_markings = [] + _records_management_settings = [] + + # _holds: List of Records Management holds. Each element is a dict with these keys: + # - enabled (bool, optional, default = True) + # - name (str, mandatory) + # - type (str, mandatory) + # - group (str, optional) + # - comment (str, optional, default = "") + # - alternate_id (str, optional) + # - date_applied (str, optional, default = "") + # - date_to_remove (str, optional, default = "") _holds = [] - _doc_generators = [] - _browser_automations = [] - _browser_automations_post = [] + + # _bulk_datasources: list of bulk datasources. Each element + # is a dict with these keys: + # - data (Data object), this is not in payload but automatically filled + # - type (str, mandatory) - either excel, servicenow, otmm, otcs, pht, json, xml + # - csv_files (list, mandatory if type = csv) + # - json_files (list, mandatory if type = json) + # - xml_files (list, optional, default = []) - only relevant for type = xml + # - xml_directories (list, optional, default = []) - only relevant for type = xml + # - xml_xpath (str, optional, default = None) - only relevant if xml_directories is set + # - xlsx_files (list, optional, default = []) + # - xlsx_sheets (list, optional, default = 0) + # - xlsx_columns (list, optional, default = None) + # - xlsx_skip_rows (int, optional, default = 0) - number of rows to skip on top of sheet + # - xlsx_na_values (list, optional, default = []) + # - pht_base_url (str, mandatory if type = pht) + # - pht_username (str, mandatory if type = pht) + # - pht_password (str, mandatory if type = pht) + # - otmm_username (str, optional, default = "") + # - otmm_password (str, optional, default = "") + # - otmm_client_id (str, optional, default = None) + # - otmm_client_secret (str, optional, default = None) + # - otmm_thread_number (int, optional, default = BULK_THREAD_NUMBER) + # - otmm_download_dir (str, optional, default = "/data/mediaassets") + # - otmm_business_unit_exclusions (list, optional, default = []) + # - otmm_product_exclusions (list, optional, default = []) + # - sn_base_url (str, mandatory if type = servicenow) + # - sn_auth_type (str, optional, default = "basic") + # - sn_username (str, optional, default = "") + # - sn_password (str, optional, default = "") + # - sn_client_id (str, optional, default = None) + # - sn_client_secret (str, optional, default = None) + # - sn_table_name (str, optional, default = "u_kb_template_technical_article_public") + # - sn_query (str, optional, default = None) + # - sn_thread_number (int, optional, default = BULK_THREAD_NUMBER) + # - sn_download_dir (str, optional, default = "/data/knowledgebase") + # - otcs_hostname (str, mandatory if type = otcs) + # - otcs_protocol (str, optional, default = "https") + # - otcs_port (str, optional, default = "443") + # - otcs_basepath (str, optional, default = "/cs/cs") + # - otcs_username (str, mandatory if type = otcs) + # - otcs_password (str, mandatory if type = otcs) + # - otcs_thread_number (int, optional, default = BULK_THREAD_NUMBER) + # - otcs_download_dir (str, optional, default = "/data/contentserver") + # - otcs_root_node_id (int, mandatory if type = otcs) + # - otcs_filter_workspace_depth (int, optional, default = 0) + # - otcs_filter_workspace_subtype (int, optional, default = 0) + # - otcs_filter_workspace_category (str, optional, default = None) + # - otcs_filter_workspace_attributes (dict | list, optional, default = None) + # - cleansings (dict, optional, default = {}) - the keys of this dict are the field names! The values of the dict are sub-dicts with these keys: + # * upper (bool, optional, default = False) + # * lower (bool, optional, default = False) + # * length (int, optional, default = None) + # * replacements (dict, optional, default = {}) - the keys are regular expressions and the values are replacement values + # - columns_to_drop (list, optional, default = []) + # - columns_to_keep (list, optional, default = []) + # - columns_to_add (list, optional, default = []) - elements are dicts with these keys: + # * source_column (str, mandatory) + # * name (str, mandatory) + # * reg_exp (str, optional, default = None) + # * prefix (str, optional, default = "") + # * suffix (str, optional, default = "") + # * length (int, optional, default = None) + # * group_chars (str, optional, default = None) + # * group_separator (str, optional, default =".") + # - conditions (list, optional, default = []) - each list item is a dict with these keys: + # * field (str, mandatory) + # * value (str | bool | list, optional, default = None) + # - explosions (list, optional, default = []) - each list item is a dict with these keys: + # * explode_field (str | list, mandatory) + # * flatten_fields (list, optional, default = []) + # * split_string_to_list (bool, optional, default = False) + # - name_column (str, optional, default = None) + # - synonyms_column (str, optional, default = None) + _bulk_datasources = [] + + # _bulk_workspaces: List of bulk workspace definitions. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - type_name (str, mandatory) - type of the workspace + # - data_source (str, mandatory) + # - force_reload (bool, optional, default = True) + # - enforce_updates (bool, optional, default = False) + # - unique (list, optional, default = []) - list of fields (columns) that should be unique -> deduplication + # - sort (list, optional, default = []) - list of fields to sort the data frame by + # - name (str, mandatory) + # - description (str, optional, default = "") + # - template_name (str, optional, default = take first template) + # - categories (list, optional, default = []) - each list item is a dict that may have these keys: + # * name (str, mandatory) + # * set (str, default = "") + # * row (int, optional) + # * attribute (str, mandatory) + # * value (str, optional if value_field is specified, default = None) + # * value_field (str, optional if value is specified, default = None) - can include placeholder surrounded by {...} + # * value_type (str, optional, default = "string") - values can be string or list, if list then string with comma-separated values will be converted to a list + # * list_splitter (str, optional, default = ";,") + # * lookup_data_source (str, optional, default = None) + # * is_key (bool,optional, default = False) - find document with old name. For this we expect a "key" value to be defined in the bulk workspace and one of the category / attribute item to be marked with "is_key" = True + # - workspaces (dict, dynamically bult up, default = {}) - list of already generated workspaces + # - external_create_date (str, optional, default = "") + # - external_modify_date (str, optional, default = "") + # - key (str, optional, default = None) - lookup key for workspaces other then the name + # - replacements (dict, optional, default = {}) - Each dictionary item has the field name as the dictionary key and a list of regular expressions as dictionary value + # - nickname (str, optional, default = None) + # - conditions (list, optional, default = []) + # * field (str, mandatory) + # * value (str | bool | list, optional, default = None) + _bulk_workspaces = [] + + # _bulk_workspace_relationships: List of bulk workspace relationships. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - from_workspace (str, mandatory) + # - from_workspace_type (str, optional, default = None) + # - from_workspace_name (str, optional, default = None) + # - from_workspace_data_source (str, optional, default = None) + # - to_workspace (str, mandatory) + # - to_workspace_type (str, optional, default = None) + # - to_workspace_name (str, optional, default = None) + # - to_workspace_data_source (str, optional, default = None) + # - relationship_type (str, optional, default = "child") + # - data_source (str, mandatory) + # - copy_data_source (bool, optional, default = False) - to avoid sideeffects for repeative usage of the data source + # - explosions (list, optional, default = []) - each list item is a dict with these keys: + # * explode_field (str | list, mandatory) + # * flatten_fields (list, optional, default = []) + # * split_string_to_list (bool, optional, default = False) + # - unique (list, optional, default = []) + # - sort (list, optional, default = []) + # - thread_number (int, optional, default = BULK_THREAD_NUMBER) + # - replacements (list, optional, default = None) + # - conditions (list, optional, default = None) + # * field (str, mandatory) + # * value (str | bool | list, optional, default = None) + _bulk_workspace_relationships = [] + + # _bulk_documents (list): List of bulk document payload. Each element + # is a dict with these keys: + # - enabled (bool, optional, default = True) + # - data_source (str, mandatory) + # - explosions (list of dicts, optional, default = []) + # - unique (list, optional, default = []) - list of fields (columns) that should be unique -> deduplication + # - sort (list, optional, default = []) - list of fields to sort the data frame by + # - enforce_updates (bool, optional, default = False) + # - name (str, mandatory) - can include placeholder surrounded by {...} + # - name_alt (str, optional, default = None) - can include placeholder surrounded by {...} + # - description (str, optional, default = None) - can include placeholder surrounded by {...} + # - download_name (str, optional, default = name) - - can include placeholder surrounded by {...} + # - nickname (str, optional, default = None) - can include placeholder surrounded by {...} + # - download_url (str, optional, default = None) + # - download_url_alt (str, optional, default = None) + # - download_dir (str, optional, default = BULK_DOCUMENT_PATH) + # - delete_download (bool, optional, default = True) + # - file_extension (str, optional, default = "") + # - file_extension_alt (str, optional, default = "html") + # - mime_type (str, optional, default = "application/pdf") + # - mime_type_alt (str, optional, default = "text/html") + # - nickname (str, optional, default = None) + # - categories (list, optional, default = []) + # * name (str, mandatory) + # * set (str, default = "") + # * row (int, optional) + # * attribute (str, mandatory) + # * value (str, optional if value_field is specified, default = None) + # * value_field (str, optional if value is specified, default = None) - can include placeholder surrounded by {...} + # * value_type (str, optional, default = "string") - values can be string or list, if list then string with comma-separated values will be converted to a list + # * list_splitter (str, optional, default = ";,") + # * lookup_data_source (str, optional, default = None) + # * is_key (bool, optional, default = False) - find document is old name. For this we expect a "key" value to be defined for the bulk document and one of the category / attribute item to be marked with "is_key" = True + # - thread_number (int, optional, default = BULK_THREAD_NUMBER) + # - external_create_date (str, optional, default = "") + # - external_modify_date (str, optional, default = "") + # - key (str, optional, default = None) - lookup key for documents other then the name + # - download_wait_time (int, optional, default = 30) + # - download_retries (int, optional, default = 2) + # - replacements (list, optional, default = []) + # - conditions (list, optional, default = []) - all conditions must evaluate to true + # * field (str, mandatory) + # * value (str | bool | list, optional, default = None) + # - workspaces (list, optional, default = []) + # * workspace_name (str, mandatory) + # * conditions (list, optional, default = []) + # + field (str, mandatory) + # + value (str | bool | list, optional, default = None) + # * workspace_type (str, mandatory) + # * datasource (str, optional, default = None) + # * workspace_folder (str, optional, default = "") + # * workspace_path (list, optional, default = []) + # * sub_workspace_type (str, optional, default = "") + # * sub_workspace_name (str, optional, default = "") + # * sub_workspace_template (str, optional, default = "") + # * sub_workspace_folder (str, optional, default = "") + # * sub_workspace_path (list, optional, default = []) + _bulk_documents = [] _placeholder_values = {} + # Link to the method in customizer.py to restart the Content Server pods. _otcs_restart_callback: Callable + + # Link to the method in customizer.py to print a log header (separator line) _log_header_callback: Callable + _aviator_enabled = False _transport_extractions: list = [] _transport_replacements: list = [] + # Disable Status files + upload_status_files: bool = True + def __init__( self, payload_source: str, @@ -395,11 +835,13 @@ def __init__( otcs_restart_callback: Callable, otiv_object: OTIV | None, m365_object: M365 | None, + core_share_object: CoreShare | None, browser_automation_object: BrowserAutomation | None, placeholder_values: dict, log_header_callback: Callable, stop_on_error: bool = False, aviator_enabled: bool = False, + upload_status_files: bool = True, ): """Initialize the Payload object @@ -432,10 +874,15 @@ def __init__( self._otcs_frontend = otcs_frontend_object self._otiv = otiv_object self._m365 = m365_object - self._sap = ( - None # this object only exists after external systems have been processed - ) + self._core_share = core_share_object + # The SAP, SuccessFactors and Salesforce objects only exists after external systems have been processed + self._sap = None + self._successfactors = None self._salesforce = None + self._servicenow = None + self._otmm = None + self._otcs_source = None + self._pht = None # the OpenText prodcut hierarchy self._browser_automation = browser_automation_object self._custom_settings_dir = custom_settings_dir self._placeholder_values = placeholder_values @@ -443,6 +890,18 @@ def __init__( self._log_header_callback = log_header_callback self._aviator_enabled = aviator_enabled self._http_object = HTTP() + self.upload_status_files = upload_status_files + + # end method definition + + def thread_wrapper(self, target, *args, **kwargs): + """Function to wrap around threads to catch exceptions during exection""" + try: + target(*args, **kwargs) + except Exception as e: + thread_name = threading.current_thread().name + logger.error("Thread %s: failed with exception %s", thread_name, e) + logger.error(traceback.format_exc()) # end method definition @@ -499,27 +958,29 @@ def init_payload(self) -> dict | None: """ if not os.path.exists(self._payload_source): - logger.error("Cannot access payload file -> %s", self._payload_source) + logger.error("Cannot access payload file -> '%s'", self._payload_source) return None # Is it a YAML file? if self._payload_source.endswith(".yaml"): - logger.info("Open payload from YAML file -> %s", self._payload_source) + logger.info("Open payload from YAML file -> '%s'", self._payload_source) try: with open(self._payload_source, "r", encoding="utf-8") as stream: payload_data = stream.read() self._payload = yaml.safe_load(payload_data) except yaml.YAMLError as exception: logger.error( - "Error while reading YAML payload file -> %s; error -> %s", + "Error while reading YAML payload file -> '%s'; error -> %s", self._payload_source, exception, ) self._payload = {} # Or is it a Terraform HCL file? - elif self._payload_source.endswith(".tf"): + elif self._payload_source.endswith(".tf") or self._payload_source.endswith( + ".tfvars" + ): logger.info( - "Open payload from Terraform HCL file -> %s", self._payload_source + "Open payload from Terraform HCL file -> '%s'", self._payload_source ) try: with open(self._payload_source, "r", encoding="utf-8") as stream: @@ -529,7 +990,7 @@ def init_payload(self) -> dict | None: self._payload = self._payload["external_payload"] except FileNotFoundError as exception: logger.error( - "Error while reading Terraform HCL payload file -> %s; error -> %s", + "Error while reading Terraform HCL payload file -> '%s'; error -> %s", self._payload_source, exception, ) @@ -537,7 +998,7 @@ def init_payload(self) -> dict | None: elif self._payload_source.endswith(".yml.gz.b64"): logger.info( - "Open payload from base64-gz-YAML file -> %s", self._payload_source + "Open payload from base64-gz-YAML file -> '%s'", self._payload_source ) try: with open(self._payload_source, "r", encoding="utf-8") as stream: @@ -548,7 +1009,7 @@ def init_payload(self) -> dict | None: except yaml.YAMLError as exception: logger.error( - "Error while reading YAML payload file -> %s; error -> %s", + "Error while reading YAML payload file -> '%s'; error -> %s", self._payload_source, exception, ) @@ -557,7 +1018,7 @@ def init_payload(self) -> dict | None: # If not, it is an unsupported type: else: logger.error( - "File -> %s has unsupported file type", + "File -> '%s' has unsupported file type", self._payload_source, ) self._payload = {} @@ -567,7 +1028,7 @@ def init_payload(self) -> dict | None: if not self._payload_sections: logger.error( - "Sections for payload -> %s are undefined. Skipping...", + "Sections for payload -> '%s' are undefined. Skipping...", self._payload_source, ) return None @@ -583,6 +1044,7 @@ def init_payload(self) -> dict | None: self._groups = self.get_payload_section("groups") self._users = self.get_payload_section("users") self._admin_settings = self.get_payload_section("adminSettings") + self._admin_settings_post = self.get_payload_section("adminSettingsPost") self._exec_pod_commands = self.get_payload_section("execPodCommands") self._external_systems = self.get_payload_section("externalSystems") self._transport_packages = self.get_payload_section("transportPackages") @@ -594,11 +1056,16 @@ def init_payload(self) -> dict | None: ) self._workspace_templates = self.get_payload_section("workspaceTemplates") self._workspaces = self.get_payload_section("workspaces") + self._bulk_datasources = self.get_payload_section("bulkDatasources") + self._bulk_workspaces = self.get_payload_section("bulkWorkspaces") + self._bulk_workspace_relationships = self.get_payload_section( + "bulkWorkspaceRelationships" + ) + self._bulk_documents = self.get_payload_section("bulkDocuments") self._sap_rfcs = self.get_payload_section("sapRFCs") self._web_reports = self.get_payload_section("webReports") self._web_reports_post = self.get_payload_section("webReportsPost") self._cs_applications = self.get_payload_section("csApplications") - self._admin_settings_post = self.get_payload_section("adminSettingsPost") self._additional_group_members = self.get_payload_section( "additionalGroupMemberships" ) @@ -624,7 +1091,8 @@ def init_payload(self) -> dict | None: ) return self._payload - # end method definition + + # end method definition def get_payload_section(self, payload_section_name: str) -> list: """Get a defined section of the payload. The section is delivered as a list of settings. @@ -658,7 +1126,7 @@ def get_payload_section(self, payload_section_name: str) -> list: return self._payload[payload_section_name] - # end method definition + # end method definition def get_all_group_names(self) -> list: """Construct a list of all group name @@ -668,7 +1136,7 @@ def get_all_group_names(self) -> list: """ return [group.get("name") for group in self._groups] - # end method definition + # end method definition def get_status_file_name( self, @@ -685,6 +1153,7 @@ def get_status_file_name( each payload file or if success is "global" - like for the deletion of the existing M365 teams (which we don't want to execute per payload file) + prefix (str, optional): prefix of the file. Typically, either "success_" or "failure_" Returns: str: name of the payload section file @@ -704,10 +1173,13 @@ def get_status_file_name( return file_name - # end method definition + # end method definition def check_status_file( - self, payload_section_name: str, payload_specific: bool = True + self, + payload_section_name: str, + payload_specific: bool = True, + prefix: str = "success_", ) -> bool: """Check if the payload section has been processed before. This is done by checking the existance of a text file in the Admin Personal @@ -720,13 +1192,20 @@ def check_status_file( each payload file or if success is "global" - like for the deletion of the existing M365 teams (which we don't want to execute per payload file) + prefix (str, optional): prefix of the file. Typically, either "success_" or "failure_" Returns: bool: True if the payload has been processed successfully before, False otherwise """ + if prefix == "success_": + message = "successfully" + else: + message = "with failures" + logger.info( - "Check if payload section -> %s has been processed successfully before...", + "Check if payload section -> '%s' has been processed %s before...", payload_section_name, + message, ) response = self._otcs.get_node_by_volume_and_path( @@ -737,7 +1216,9 @@ def check_status_file( target_folder_id = 2004 # use Personal Workspace of Admin as fallback file_name = self.get_status_file_name( - payload_section_name=payload_section_name, payload_specific=payload_specific + payload_section_name=payload_section_name, + payload_specific=payload_specific, + prefix=prefix, ) status_document = self._otcs.get_node_by_parent_and_name( @@ -747,17 +1228,19 @@ def check_status_file( name = self._otcs.get_result_value(status_document, "name") if name == file_name: logger.info( - "Payload section -> %s has been processed successfully before. Skipping...", + "Payload section -> '%s' has been processed %s before. Skipping...", payload_section_name, + message, ) return True logger.info( - "Payload section -> %s has not been processed successfully before. Processing...", + "Payload section -> '%s' has not been processed %s before. Processing...", payload_section_name, + message, ) return False - # end method definition + # end method definition def write_status_file( self, @@ -783,15 +1266,18 @@ def write_status_file( bool: True if the status file as been upladed to Extended ECM successfully, False otherwise """ + if not self.upload_status_files: + return True + if success: logger.info( - "Payload section -> %s has been completed successfully!", + "Payload section -> '%s' has been completed successfully!", payload_section_name, ) prefix = "success_" else: logger.error( - "Payload section -> %s had failures!", + "Payload section -> '%s' had failures!", payload_section_name, ) prefix = "failure_" @@ -839,22 +1325,25 @@ def write_status_file( if response: logger.info( - "Status file -> %s has been written to Personal Workspace of admin user", + "Status file -> '%s' has been written to Personal Workspace of admin user", file_name, ) return True logger.error( - "Failed to write status file -> %s to Personal Workspace of admin user", + "Failed to write status file -> '%s' to Personal Workspace of admin user", file_name, ) return False - # end method definition + # end method definition def get_status_file( - self, payload_section_name: str, payload_specific: bool = True + self, + payload_section_name: str, + payload_specific: bool = True, + prefix: str = "success_", ) -> list | None: """Get the status file and read it into a dictionary. @@ -865,12 +1354,13 @@ def get_status_file( each payload file or if success is "global" - like for the deletion of the existing M365 teams (which we don't want to execute per payload file) + prefix (str, optional): prefix of the file. Typically, either "success_" or "failure_" Returns: dict: content of the status file as a dictionary or None in case of an error """ logger.info( - "Get the status file of the payload section -> %s...", + "Get the status file of the payload section -> '%s'...", payload_section_name, ) @@ -882,7 +1372,9 @@ def get_status_file( source_folder_id = 2004 # use Personal Workspace of Admin as fallback file_name = self.get_status_file_name( - payload_section_name=payload_section_name, payload_specific=payload_specific + payload_section_name=payload_section_name, + payload_specific=payload_specific, + prefix=prefix, ) status_document = self._otcs.get_node_by_parent_and_name( @@ -890,7 +1382,7 @@ def get_status_file( ) status_file_id = self._otcs.get_result_value(status_document, "id") if not status_file_id: - logger.error("Cannot find status file -> %s", file_name) + logger.error("Cannot find status file -> '%s'", file_name) return None content = self._otcs.get_document_content(status_file_id) @@ -905,7 +1397,113 @@ def get_status_file( logger.error("File content is not in valid JSON format; error -> %s", e) return None - # end method definition + # end method definition + + def get_payload(self) -> dict: + """Get the Payload""" + return self._payload + + def get_users(self) -> list: + """Get all useres""" + return self._users + + def get_groups(self) -> list: + """Get all groups""" + return self._groups + + def get_workspaces(self) -> list: + """Get all workspaces""" + return self._workspaces + + def get_otcs_frontend(self) -> object: + """Get OTCS Frontend oject""" + return self._otcs_frontend + + def get_otcs_backend(self) -> object: + """Get OTCS Backend object""" + return self._otcs_backend + + def get_otds(self) -> object: + """Get OTDS object""" + return self._otds + + def get_k8s(self) -> object: + """Get K8s object""" + return self._k8s + + def get_m365(self) -> object: + """Get M365 object""" + return self._m365 + + def generate_password( + self, + length: int, + use_special_chars: bool = False, + min_special: int = 1, + min_numerical: int = 1, + min_upper: int = 1, + min_lower: int = 1, + override_special: str = None, + ) -> str: + """Generate random passwords with a given specification + + Args: + length (int): Define password length + use_special_chars (bool, optional): Define if special characters should be used. Defaults to False. + min_special (int, optional): Define min amount of special characters. Defaults to 1. + min_numerical (int, optional): Define if numbers should be used. Defaults to 1. + min_upper (int, optional): Define mininum number of upper case letters. Defaults to 1. + min_lower (int, optional): Define minimum number of lower case letters. Defaults to 1. + override_special (string | None, optional): Define special characters to be used, if not set: !@#$%^&*()_-+=<>?/{}[]. Defaults to None. + + Raises: + ValueError: _description_ + + Returns: + str: Generated password + """ + # Define character sets + lowercase_letters = string.ascii_lowercase + uppercase_letters = string.ascii_uppercase + numerical_digits = string.digits + special_characters = "!@#$%^&*()_-+=<>?/{}[]" + + if override_special: + special_characters = override_special + # Ensure minimum requirements are met + + if min_special + min_numerical + min_upper + min_lower > length: + raise ValueError("Minimum requirements exceed password length") + + # Initialize the password + password = [] + + # Add required characters + password.extend(random.sample(lowercase_letters, min_lower)) + password.extend(random.sample(uppercase_letters, min_upper)) + password.extend(random.sample(numerical_digits, min_numerical)) + + if use_special_chars: + password.extend(random.sample(special_characters, min_special)) + + # Fill the rest of the password with random characters + remaining_length = length - len(password) + all_chars = lowercase_letters + uppercase_letters + numerical_digits + + if use_special_chars: + all_chars += special_characters + + password.extend(random.choices(all_chars, k=remaining_length)) + + # Shuffle the password to ensure randomness + random.shuffle(password) + + # Convert the password list to a string + final_password = "".join(password) + + return final_password + + # end method definition def determine_group_id(self, group: dict) -> int: """Determine the id of a group - either from payload or from OTCS. @@ -933,37 +1531,133 @@ def determine_group_id(self, group: dict) -> int: existing_groups = self._otcs.get_group(name=group_name) # We use the lookup method here as get_group() could deliver more # then 1 result element (in edge cases): - group_id = self._otcs.lookup_result_value( + existing_group_id = self._otcs.lookup_result_value( response=existing_groups, key="name", value=group_name, return_key="id" ) # Have we found an exact match? - if group_id: + if existing_group_id: + logger.debug( + "Found existing group -> '%s' with ID -> %s. Update ID in payload...", + group_name, + existing_group_id, + ) # Write ID back into the payload: - group["id"] = group_id + group["id"] = existing_group_id return group["id"] else: - logger.info("Did not find an existing group with name -> %s", group_name) + logger.debug("Did not find an existing group with name -> '%s'", group_name) return 0 - # end method definition + # end method definition - def determine_user_id(self, user: dict) -> int: - """Determine the id of a user - either from payload or from OTCS - If the user is found in OTCS write back the ID into the payload. + def determine_group_id_m365(self, group: dict) -> str | None: + """Determine the id of a M365 group - either from payload or from M365 via Graph API + If the group is found in M365 write back the M365 group ID into the payload. Args: - user (dict): user payload element + group (dict): group payload element Returns: - int: user ID + str | None: M365 group ID or None if the group is not found. Side Effects: - the user items are modified by adding an "id" dict element that - includes the technical ID of the user in Extended ECM + the group items are modified by adding an "m365_id" dict item that + includes the technical ID of the group in Microsoft 365 """ - # Is the ID already known in payload? (if determined before) - if "id" in user: - return user["id"] + # is the payload already updated with the M365 group ID? + if "m365_id" in group: + return group["m365_id"] + + if not "name" in group: + logger.error("Group needs a name to lookup the M365 group ID.") + return None + group_name = group["name"] + + existing_group = self._m365.get_group(group_name=group_name) + existing_group_id = self._m365.get_result_value( + response=existing_group, key="id" + ) + if existing_group_id: + logger.debug( + "Found existing Microsoft 365 group -> '%s' with ID -> %s. Update m365_id in payload...", + group_name, + existing_group_id, + ) + # write back the M365 user ID into the payload + group["m365_id"] = existing_group_id + return group["m365_id"] + else: + logger.debug( + "Did not find an existing M365 group with name -> '%s'", group_name + ) + return None + + # end method definition + + def determine_group_id_core_share(self, group: dict) -> str | None: + """Determine the id of a Core Share group - either from payload or from Core Share directly + + Args: + group (dict): Payload dictionary of the group. + + Returns: + str | None: Core Share Group ID or None. + """ + + # Is the ID already known in payload? (if determined before) + if "core_share_id" in group: + return group["core_share_id"] + + if not "name" in group: + logger.error("Group needs a name to lookup the Core Share ID.") + return None + + if not isinstance(self._core_share, CoreShare): + logger.error( + "Core Share connection not setup properly.", + ) + return None + + core_share_group = self._core_share.get_group_by_name(name=group["name"]) + core_share_group_id = self._core_share.get_result_value( + response=core_share_group, key="id" + ) + + # Have we found the group? + if core_share_group_id: + logger.debug( + "Found existing Core Share group -> '%s' with ID -> %s. Update m365_id in payload...", + group["name"], + core_share_group_id, + ) + # Write ID back into the payload: + group["core_share_id"] = core_share_group_id + return group["core_share_id"] + else: + logger.debug( + "Did not find an existing Core Share group with name -> '%s'", + group["name"], + ) + return None + + # end method definition + + def determine_user_id(self, user: dict) -> int: + """Determine the id of a user - either from payload or from OTCS + If the user is found in OTCS write back the ID into the payload. + + Args: + user (dict): user payload element + Returns: + int: user ID + Side Effects: + the user items are modified by adding an "id" dict element that + includes the technical ID of the user in Extended ECM + """ + + # Is the ID already known in payload? (if determined before) + if "id" in user: + return user["id"] if not "name" in user: logger.error("User needs a login name to lookup the ID.") @@ -983,19 +1677,19 @@ def determine_user_id(self, user: dict) -> int: user["id"] = user_id return user["id"] else: - logger.info("Did not find an existing user with name -> %s", user_name) + logger.debug("Did not find an existing user with name -> '%s'", user_name) return 0 # end method definition - def determine_user_id_m365(self, user: dict) -> int: + def determine_user_id_m365(self, user: dict) -> str | None: """Determine the id of a M365 user - either from payload or from M365 via Graph API If the user is found in M365 write back the M365 user ID into the payload. Args: user (dict): user payload element Returns: - int: M365 user ID or 0 if the user is not found. + str | None: M365 user ID or None if the user is not found. Side Effects: the user items are modified by adding an "m365_id" dict element that includes the technical ID of the user in Microsoft 365 @@ -1007,14 +1701,14 @@ def determine_user_id_m365(self, user: dict) -> int: if not "name" in user: logger.error("User needs a login name to lookup the M365 user ID.") - return 0 + return None user_name = user["name"] m365_user_name = user_name + "@" + self._m365.config()["domain"] existing_user = self._m365.get_user(m365_user_name) if existing_user: - logger.info( - "Found existing Microsoft 365 user -> %s (%s) with ID -> %s. Update m365_id in payload...", + logger.debug( + "Found existing Microsoft 365 user -> '%s' (%s) with ID -> %s. Update m365_id in payload...", existing_user["displayName"], existing_user["userPrincipalName"], existing_user["id"], @@ -1023,10 +1717,66 @@ def determine_user_id_m365(self, user: dict) -> int: user["m365_id"] = existing_user["id"] return user["m365_id"] else: - logger.info("Did not find an existing M365 user with name -> %s", user_name) - return 0 + logger.debug( + "Did not find an existing M365 user with name -> '%s'", user_name + ) + return None - # end method definition + # end method definition + + def determine_user_id_core_share(self, user: dict) -> str | None: + """Determine the ID of a Core Share user - either from payload or from Core Share directly + + Args: + user (dict): Payload dictionary of the user. + + Returns: + str | None: Core Share User ID or None. + """ + + # Is the ID already known in payload? (if determined before) + if "core_share_id" in user: + return user["core_share_id"] + + if not isinstance(self._core_share, CoreShare): + logger.error( + "Core Share connection not setup properly.", + ) + return False + + core_share_user_id = None + + # Next try to lookup ID via the email address: + if "email" in user: + core_share_user = self._core_share.get_user_by_email(user["email"]) + core_share_user_id = self._core_share.get_result_value( + response=core_share_user, key="id" + ) + + # Last resort is to lookup the ID via firstname + lastname. + # This is handy in case the Email has changed: + if not core_share_user_id and "lastname" in user and "firstname" in user: + core_share_user = self._core_share.get_user_by_name( + first_name=user["firstname"], last_name=user["lastname"] + ) + core_share_user_id = self._core_share.get_result_value( + response=core_share_user, key="id" + ) + + # Have we found the user? + if core_share_user_id: + # Write ID back into the payload: + user["core_share_id"] = core_share_user_id + return user["core_share_id"] + else: + logger.debug( + "Did not find an existing Core Share user with name -> '%s %s'", + user["firstname"], + user["lastname"], + ) + return None + + # end method definition def determine_workspace_id(self, workspace: dict) -> int: """Determine the nodeID of a workspace - either from payload or from OTCS @@ -1053,17 +1803,107 @@ def determine_workspace_id(self, workspace: dict) -> int: workspace["nodeId"] = workspace_id return workspace_id else: - logger.info( - "Workspace of type -> %s and name -> %s does not yet exist.", + logger.debug( + "Workspace of type -> '%s' and name -> '%s' does not yet exist.", workspace["type_name"], workspace["name"], ) return 0 - # end method definition + # end method definition + + def determine_workspace_type_and_template_id( + self, + workspace_type_name: str, + workspace_template_name: str = "", + ) -> tuple[int | None, int | None]: + """Determine the IDs of type and template based on the provided names. + This depends on the self._workspace_types list to be up to date (see process_workspace_types()) + + Args: + workspace_type_name (str): Name of the workspace type + workspace_template_name (str, optional): Name of the workspace template. Defaults to "". + prefix (str, optional): prefix for logging, e.g. used for multi-threading to log the thread ID + + Returns: + tuple[int, int]: IDs of the workspace type (first) and workspace template (second) + """ + + # Check if the customizer has initialized the workspace type list + if not self._workspace_types: + logger.error( + "Workspace type list is not initialized! This should never happen!", + ) + return (None, None) + + # Find the workspace type with the name given in the payload: + workspace_type = next( + ( + item + for item in self._workspace_types + if item["name"] == workspace_type_name + ), + None, + ) + if workspace_type is None: + logger.error( + "Workspace Type -> '%s' not found!", + workspace_type_name, + ) + return (None, None) + + workspace_type_id = workspace_type["id"] + + if workspace_type["templates"] == []: + logger.warning( + "Workspace Type -> '%s' does not have templates!", + workspace_type_name, + ) + return (workspace_type_id, None) + + if workspace_template_name: + workspace_template = next( + ( + item + for item in workspace_type["templates"] + if item["name"] == workspace_template_name + ), + None, + ) + if workspace_template: # does this template exist? + logger.info( + "Workspace Template -> '%s' has been specified in payload and it does exist.", + workspace_template_name, + ) + else: + logger.error( + "Workspace Template -> '%s' has been specified in payload but it doesn't exist!", + workspace_template_name, + ) + logger.error( + "Workspace Type -> '%s' has only these templates -> %s", + workspace_type_name, + workspace_type["templates"], + ) + return (workspace_type_id, None) + + # template to be used is NOT specified in the payload - then we just take the first one: + else: + workspace_template = workspace_type["templates"][0] + logger.info( + "Workspace Template has not been specified in payload - we just take the first one (%s)", + workspace_template, + ) + + workspace_template_id = workspace_template["id"] + + return (workspace_type_id, workspace_template_id) + + # end method definition def add_transport_extractions(self, extractions: list) -> int: - """_summary_ + """Calculate the number of extrations and safe them in + a global list self._transport_extractions. Args: extractions (list): list of extractions from a single transport package @@ -1081,7 +1921,7 @@ def add_transport_extractions(self, extractions: list) -> int: return counter - # end method definition + # end method definition def process_payload(self): """Main method to process a payload file. @@ -1130,13 +1970,19 @@ def process_payload(self): # if the group already exists in Extended ECM. This is important especially # if the customizer pod is restarted / run multiple times: self.process_group_placeholders() + if self._core_share and isinstance(self._core_share, CoreShare): + self._log_header_callback("Process Core Share Groups") + self.process_groups_core_share() if self._m365 and isinstance(self._m365, M365): - self._log_header_callback("Cleanup existing MS Teams") + self._log_header_callback("Cleanup existing M365 Teams") self.cleanup_all_teams_m365() self._log_header_callback("Process M365 Groups") self.process_groups_m365() case "users": self._log_header_callback("Process OTCS Users") + self._user_customization = bool( + payload_section.get("user_customization", "True") + ) self.process_users() # Add all users with ID the a lookup dict for placeholder replacements # in adminSetting. This also updates the payload with user IDs from OTCS @@ -1167,8 +2013,11 @@ def process_payload(self): ) else: logger.info("Processing of OTIV licenses is disabled.") - self._log_header_callback("Process User Settings") + self._log_header_callback("Process OTDS User Settings") self.process_user_settings() + if self._core_share and isinstance(self._core_share, CoreShare): + self._log_header_callback("Process Core Share Users") + self.process_users_core_share() if self._m365 and isinstance(self._m365, M365): self._log_header_callback("Process M365 Users") self.process_users_m365() @@ -1177,7 +2026,7 @@ def process_payload(self): self._log_header_callback("Process M365 Teams") self.process_teams_m365() case "adminSettings": - self._log_header_callback("Process Administration Settings") + self._log_header_callback("Process OTCS Administration Settings") restart_required = self.process_admin_settings( admin_settings=self._admin_settings ) @@ -1188,7 +2037,9 @@ def process_payload(self): # Restart OTCS frontend and backend pods: self._otcs_restart_callback(self._otcs_backend) case "adminSettingsPost": - self._log_header_callback("Process Administration Settings (post)") + self._log_header_callback( + "Process OTCS Administration Settings (post)" + ) restart_required = self.process_admin_settings( self._admin_settings_post, "adminSettingsPost" ) @@ -1213,6 +2064,22 @@ def process_payload(self): case "externalSystems": self._log_header_callback("Process External System Connections") self.process_external_systems() + # Now the SAP, SuccessFactors and Salesforce objects + # should be initialized and we can process users and groups + # in these external systems: + if self._sap and isinstance(self._sap, SAP): + self._log_header_callback("Process SAP Users") + self.process_users_sap() + if self._successfactors and isinstance( + self._successfactors, SuccessFactors + ): + self._log_header_callback("Process SuccessFactors Users") + self.process_users_successfactors() + if self._salesforce and isinstance(self._salesforce, Salesforce): + self._log_header_callback("Process Salesforce Groups") + self.process_groups_salesforce() + self._log_header_callback("Process Salesforce Users") + self.process_users_salesforce() case "transportPackages": self._log_header_callback("Process Transport Packages") self.process_transport_packages(self._transport_packages) @@ -1226,7 +2093,7 @@ def process_payload(self): if self._m365 and isinstance(self._m365, M365): # Right after the transport that creates the top level folders # we can add the M365 Teams apps for Extended ECM as its own tab: - self._log_header_callback("Process M365 Teams apps") + self._log_header_callback("Process M365 Teams Apps") self.process_teams_m365_apps() case "contentTransportPackages": self._log_header_callback("Process Content Transport Packages") @@ -1247,7 +2114,7 @@ def process_payload(self): # If a payload file (e.g. additional ones) does not have # transportPackages then it can happen that the # self._workspace_types is not yet initialized. As we need - # this structure for workspaceTemnplates we initialize it here: + # this structure for workspaceTemplates we initialize it here: if not self._business_object_types: self._log_header_callback("Process Business Object Types") self.process_business_object_types() @@ -1283,45 +2150,31 @@ def process_payload(self): if self._aviator_enabled: self._log_header_callback("Process Workspace Aviators") self.process_workspace_aviators() + case "bulkDatasources": + # this is here just to avoid an error in catch all below + # the bulkDatasources dictionary will be processed in + # the other bulk* sections + pass + case "bulkWorkspaces": + if not self._workspace_types: + self._log_header_callback("Process Workspace Types") + self.process_workspace_types() + self._log_header_callback("Process Bulk Workspaces") + self.process_bulk_workspaces() + case "bulkWorkspaceRelationships": + self._log_header_callback("Process Bulk Workspace Relationships") + self.process_bulk_workspace_relationships() + case "bulkDocuments": + self._log_header_callback("Process Bulk Documents") + self.process_bulk_documents() case "sapRFCs": - self._log_header_callback("Process SAP RFCs") - - sap_external_system = {} - if self._external_systems: - sap_external_system = next( - ( - item - for item in self._external_systems - if item.get("external_system_type") - and item["external_system_type"] == "SAP" - ), - {}, - ) - if not sap_external_system: - logger.warning( - "SAP RFC in payload but SAP external system is configured. RFCs will not be processed." - ) - elif not sap_external_system.get("enabled"): - logger.warning( - "SAP RFC in payload but SAP external system is disabled. RFCs will not be processed." - ) - # if the external system is not marked reachable we check it once more as this could be fooled - # by customizer pod restarts - elif not sap_external_system.get( - "reachable" - ) and not self.check_external_system(sap_external_system): + if self._sap and isinstance(self._sap, SAP): + self._log_header_callback("Process SAP RFCs") + self.process_sap_rfcs(self._sap) + else: logger.warning( - "SAP RFC in payload but SAP external system is not reachable. RFCs will not be processed." + "SAP RFC in payload but SAP external system is not configured or not enabled. RFCs will not be processed." ) - else: - if self._sap: - self.process_sap_rfcs(self._sap) - self._log_header_callback("Process SAP Users") - self.process_users_sap(self._sap) - else: - logger.error( - "SAP object is not yet initialized. Something is wrong with payload section ordering." - ) case "webReports": self._log_header_callback("Process Web Reports") restart_required = self.process_web_reports( @@ -1410,37 +2263,45 @@ def process_payload(self): browser_automations=self._browser_automations_post, section_name="browserAutomationsPost", ) + case "workspaceTypes": + pass case _: logger.error( - "Illegal payload section name -> %s in payloadSections!", + "Illegal payload section name -> '%s' in payloadSections!", payload_section["name"], ) payload_section_restart = payload_section.get("restart", False) if payload_section_restart: logger.info( - "Payload section -> %s requests a restart of OTCS services...", + "Payload section -> '%s' requests a restart of OTCS services...", payload_section["name"], ) # Restart OTCS frontend and backend pods: self._otcs_restart_callback(self._otcs_backend) else: logger.info( - "Payload section -> %s does not require a restart of OTCS services", + "Payload section -> '%s' does not require a restart of OTCS services", payload_section["name"], ) - if self._users: + if self._users and self._user_customization: self._log_header_callback("Process User Profile Photos") self.process_user_photos() if self._m365 and isinstance(self._m365, M365): self._log_header_callback("Process M365 User Profile Photos") self.process_user_photos_m365() + if self._salesforce and isinstance(self._salesforce, Salesforce): + self._log_header_callback("Process Salesforce User Profile Photos") + self.process_user_photos_salesforce() + if self._salesforce and isinstance(self._core_share, CoreShare): + self._log_header_callback("Process Core Share User Profile Photos") + self.process_user_photos_core_share() self._log_header_callback("Process User Favorites and Profiles") self.process_user_favorites_and_profiles() self._log_header_callback("Process User Security") self.process_user_security() - # end method definition + # end method definition def process_web_hooks(self, webhooks: list, section_name: str = "webHooks") -> bool: """Process Web Hooks in payload and do HTTP requests. @@ -1484,7 +2345,9 @@ def process_web_hooks(self, webhooks: list, section_name: str = "webHooks") -> b success = False continue elif not enabled: - logger.info("Payload for Web Hook -> %s is disabled. Skipping...", url) + logger.info( + "Payload for Web Hook -> '%s' is disabled. Skipping...", url + ) continue description = webhook.get("description") @@ -1515,7 +2378,7 @@ def process_web_hooks(self, webhooks: list, section_name: str = "webHooks") -> b return success - # end method definition + # end method definition def process_partitions(self, section_name: str = "partitions") -> bool: """Process OTDS partitions in payload and create them in OTDS. @@ -1531,7 +2394,7 @@ def process_partitions(self, section_name: str = "partitions") -> bool: """ if not self._partitions: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -1552,7 +2415,7 @@ def process_partitions(self, section_name: str = "partitions") -> bool: # In this case we skip the element: if "enabled" in partition and not partition["enabled"]: logger.info( - "Payload for Partition -> %s is disabled. Skipping...", + "Payload for Partition -> '%s' is disabled. Skipping...", partition_name, ) continue @@ -1562,23 +2425,23 @@ def process_partitions(self, section_name: str = "partitions") -> bool: # Check if Partition does already exist # (in an attempt to make the code idem-potent) logger.info( - "Check if OTDS partition -> %s does already exist...", partition_name + "Check if OTDS partition -> '%s' does already exist...", partition_name ) response = self._otds.get_partition(partition_name, show_error=False) if response: logger.info( - "Partition -> %s does already exist. Skipping...", partition_name + "Partition -> '%s' does already exist. Skipping...", partition_name ) continue # Only continue if Partition does not exist already - logger.info("Partition -> %s does not exist. Creating...", partition_name) + logger.info("Partition -> '%s' does not exist. Creating...", partition_name) response = self._otds.add_partition(partition_name, partition_description) if response: - logger.info("Added OTDS partition -> %s", partition_name) + logger.info("Added OTDS partition -> '%s'", partition_name) else: - logger.error("Failed to add OTDS partition -> %s", partition_name) + logger.error("Failed to add OTDS partition -> '%s'", partition_name) success = False continue @@ -1589,13 +2452,13 @@ def process_partitions(self, section_name: str = "partitions") -> bool: ) if response: logger.info( - "Added OTDS partition -> %s to access role -> %s", + "Added OTDS partition -> '%s' to access role -> '%s'", partition_name, access_role, ) else: logger.error( - "Failed to add OTDS partition -> %s to access role -> %s", + "Failed to add OTDS partition -> '%s' to access role -> '%s'", partition_name, access_role, ) @@ -1610,7 +2473,9 @@ def process_partitions(self, section_name: str = "partitions") -> bool: otcs_resource_name = self._otcs.config()["resource"] otcs_resource = self._otds.get_resource(otcs_resource_name) if not otcs_resource: - logger.error("Cannot find OTCS resource -> %s", otcs_resource_name) + logger.error( + "Cannot find OTCS resource -> '%s'", otcs_resource_name + ) success = False continue otcs_resource_id = otcs_resource["resourceID"] @@ -1625,7 +2490,7 @@ def process_partitions(self, section_name: str = "partitions") -> bool: if not assigned_license: logger.error( - "Failed to assign partition -> %s to license feature -> %s of license -> %s!", + "Failed to assign partition -> '%s' to license feature -> '%s' of license -> '%s'!", partition_name, license_feature, license_name, @@ -1633,7 +2498,7 @@ def process_partitions(self, section_name: str = "partitions") -> bool: success = False else: logger.info( - "Successfully assigned partition -> %s to license feature -> %s of license -> %s", + "Successfully assigned partition -> '%s' to license feature -> '%s' of license -> '%s'", partition_name, license_feature, license_name, @@ -1643,7 +2508,7 @@ def process_partitions(self, section_name: str = "partitions") -> bool: return success - # end method definition + # end method definition def process_partition_licenses( self, section_name: str = "partitionLicenses" @@ -1662,7 +2527,7 @@ def process_partition_licenses( """ if not self._partitions: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -1683,7 +2548,7 @@ def process_partition_licenses( # In this case we skip the element: if "enabled" in partition and not partition["enabled"]: logger.info( - "Payload for Partition -> %s is disabled. Skipping...", + "Payload for Partition -> '%s' is disabled. Skipping...", partition_name, ) continue @@ -1691,7 +2556,7 @@ def process_partition_licenses( response = self._otds.get_partition(partition_name, show_error=True) if not response: logger.error( - "Partition -> %s does not exist. Skipping...", partition_name + "Partition -> '%s' does not exist. Skipping...", partition_name ) success = False continue @@ -1704,7 +2569,9 @@ def process_partition_licenses( otcs_resource_name = self._otcs.config()["resource"] otcs_resource = self._otds.get_resource(otcs_resource_name) if not otcs_resource: - logger.error("Cannot find OTCS resource -> %s", otcs_resource_name) + logger.error( + "Cannot find OTCS resource -> '%s'", otcs_resource_name + ) success = False continue otcs_resource_id = otcs_resource["resourceID"] @@ -1717,7 +2584,7 @@ def process_partition_licenses( license_name=license_name, ): logger.info( - "Partition -> %s is already licensed for -> %s (%s)", + "Partition -> '%s' is already licensed for -> '%s' ('%s')", partition_name, license_name, license_feature, @@ -1732,7 +2599,7 @@ def process_partition_licenses( if not assigned_license: logger.error( - "Failed to assign partition -> %s to license feature -> %s of license -> %s!", + "Failed to assign partition -> '%s' to license feature -> '%s' of license -> '%s'!", partition_name, license_feature, license_name, @@ -1740,7 +2607,7 @@ def process_partition_licenses( success = False else: logger.info( - "Successfully assigned partition -> %s to license feature -> %s of license -> %s", + "Successfully assigned partition -> '%s' to license feature -> '%s' of license -> '%s'", partition_name, license_feature, license_name, @@ -1750,7 +2617,7 @@ def process_partition_licenses( return success - # end method definition + # end method definition def process_oauth_clients(self, section_name: str = "oauthClients") -> bool: """Process OTDS OAuth clients in payload and create them in OTDS. @@ -1787,36 +2654,36 @@ def process_oauth_clients(self, section_name: str = "oauthClients") -> bool: # In this case we skip the element: if "enabled" in oauth_client and not oauth_client["enabled"]: logger.info( - "Payload for OAuthClient -> %s is disabled. Skipping...", + "Payload for OAuthClient -> '%s' is disabled. Skipping...", client_name, ) continue - client_description = oauth_client.get("description") - client_confidential = oauth_client.get("confidential") - client_partition = oauth_client.get("partition") + client_description = oauth_client.get("description", "") + client_confidential = oauth_client.get("confidential", True) + client_partition = oauth_client.get("partition", "Global") if client_partition == "Global": client_partition = [] - client_redirect_urls = oauth_client.get("redirect_urls") + client_redirect_urls = oauth_client.get("redirect_urls", []) client_permission_scopes = oauth_client.get("permission_scopes") client_default_scopes = oauth_client.get("default_scopes") - client_allow_impersonation = oauth_client.get("allow_impersonation") + client_allow_impersonation = oauth_client.get("allow_impersonation", True) client_secret = oauth_client.get("secret", "") # Check if OAuth client does already exist # (in an attempt to make the code idem-potent) logger.info( - "Check if OTDS OAuth Client -> %s does already exist...", client_name + "Check if OTDS OAuth Client -> '%s' does already exist...", client_name ) response = self._otds.get_oauth_client(client_name, show_error=False) if response: logger.info( - "OAuth Client -> %s does already exist. Skipping...", client_name + "OAuth Client -> '%s' does already exist. Skipping...", client_name ) continue else: logger.info( - "OAuth Client -> %s does not exist. Creating...", client_name + "OAuth Client -> '%s' does not exist. Creating...", client_name ) response = self._otds.add_oauth_client( @@ -1831,9 +2698,9 @@ def process_oauth_clients(self, section_name: str = "oauthClients") -> bool: secret=client_secret, ) if response: - logger.info("Added OTDS OAuth client -> %s", client_name) + logger.info("Added OTDS OAuth client -> '%s'", client_name) else: - logger.error("Failed to add OTDS OAuth client -> %s", client_name) + logger.error("Failed to add OTDS OAuth client -> '%s'", client_name) success = False continue @@ -1841,7 +2708,9 @@ def process_oauth_clients(self, section_name: str = "oauthClients") -> bool: # the automatically created secret: client_secret = response.get("secret") if not client_secret: - logger.error("OAuth client -> %s does not have a secret!", client_name) + logger.error( + "OAuth client -> '%s' does not have a secret!", client_name + ) continue client_description += " Client Secret: " + str(client_secret) @@ -1865,17 +2734,17 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: The payload section is a list of dicts with these items: { - enabled: True or False to enable or disable the payload item - name: Name of the authorization handler. This is shown in the first - column of the Auth Handler list in OTDS. - description: Description of the handler. This is shown in the second - column of the Auth Handler - type: type of the handler. Possible values are SALM, SAP, OAUTH - priority: a numeric value to order different handlers in OTDS by priority - active_by_default: Whether to activate this handler for any request to the - OTDS login page. If True, any login request to the OTDS - login page will be redirected to the IdP. If false, the - user has to select the provider on the login page. + enabled (bool): True or False to enable or disable the payload item + name (str): Name of the authorization handler. This is shown in the first + column of the Auth Handler list in OTDS. + description (str): Description of the handler. This is shown in the second + column of the Auth Handler + type (str): Type of the handler. Possible values are SALM, SAP, OAUTH + priority (int): A numeric value to order different handlers in OTDS by priority + active_by_default (bool): Whether to activate this handler for any request to the + OTDS login page. If True, any login request to the OTDS + login page will be redirected to the IdP. If false, the + user has to select the provider on the login page. provider_name: The name of the identity provider. This should be a single word since it will be part of the metadata URL. This is what is shown as a button on the OTDS login page. @@ -1913,7 +2782,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: """ if not self._auth_handlers: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -1934,7 +2803,9 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: # Check if Auth Handler does already exist (e.g. after a restart of # the customizer pod): if self._otds.get_auth_handler(handler_name, show_error=False): - logger.info("Auth handler -> %s does already exist. Skipping...") + logger.info( + "Auth handler -> '%s' does already exist. Skipping...", handler_name + ) continue handler_description = auth_handler.get("description") @@ -1943,7 +2814,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: # In this case we skip the element: if "enabled" in auth_handler and not auth_handler["enabled"]: logger.info( - "Payload for OTDS Authorization Handler -> %s is disabled. Skipping...", + "Payload for OTDS Authorization Handler -> '%s' is disabled. Skipping...", handler_name, ) continue @@ -2009,7 +2880,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: certificate_file = auth_handler.get("certificate_file") if not certificate_file: logger.error( - "SAP Authorization handler -> %s (%s) requires a certificate file name. Skipping...", + "SAP Authorization handler -> '%s' (%s) requires a certificate file name. Skipping...", handler_name, handler_type, ) @@ -2019,7 +2890,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: if not certificate_password: # This is not an error - we canhave this key with empty string! logger.info( - "SAP Authorization handler -> %s (%s) does not have a certificate password - this can be OK.", + "SAP Authorization handler -> '%s' (%s) does not have a certificate password - this can be OK.", handler_name, handler_type, ) @@ -2035,7 +2906,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: provider_name = auth_handler.get("provider_name") if not provider_name: logger.error( - "OAUTH Authorization handler -> %s (%s) requires a provider name. Skipping...", + "OAUTH Authorization handler -> '%s' (%s) requires a provider name. Skipping...", handler_name, handler_type, ) @@ -2044,7 +2915,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: client_id = auth_handler.get("client_id") if not client_id: logger.error( - "OAUTH Authorization handler -> %s (%s) requires a client ID. Skipping...", + "OAUTH Authorization handler -> '%s' (%s) requires a client ID. Skipping...", handler_name, handler_type, ) @@ -2053,7 +2924,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: client_secret = auth_handler.get("client_secret") if not client_secret: logger.error( - "OAUTH Authorization handler -> %s (%s) requires a client secret. Skipping...", + "OAUTH Authorization handler -> '%s' (%s) requires a client secret. Skipping...", handler_name, handler_type, ) @@ -2062,7 +2933,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: authorization_endpoint = auth_handler.get("authorization_endpoint") if not authorization_endpoint: logger.error( - "OAUTH Authorization handler -> %s (%s) requires a authorization endpoint. Skipping...", + "OAUTH Authorization handler -> '%s' (%s) requires a authorization endpoint. Skipping...", handler_name, handler_type, ) @@ -2071,7 +2942,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: token_endpoint = auth_handler.get("token_endpoint") if not token_endpoint: logger.warning( - "OAUTH Authorization handler -> %s (%s) does not have a token endpoint.", + "OAUTH Authorization handler -> '%s' (%s) does not have a token endpoint.", handler_name, handler_type, ) @@ -2097,13 +2968,13 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: if response: logger.info( - "Successfully added OTDS authorization handler -> %s (%s)", + "Successfully added OTDS authorization handler -> '%s' (%s)", handler_name, handler_type, ) else: logger.error( - "Failed to add OTDS authorization handler -> %s (%s)", + "Failed to add OTDS authorization handler -> '%s' (%s)", handler_name, handler_type, ) @@ -2113,7 +2984,7 @@ def process_auth_handlers(self, section_name: str = "authHandlers") -> bool: return success - # end method definition + # end method definition def process_trusted_sites(self, section_name: str = "trustedSites") -> bool: """Process OTDS trusted sites in payload and create them in OTDS. @@ -2129,7 +3000,7 @@ def process_trusted_sites(self, section_name: str = "trustedSites") -> bool: """ if not self._trusted_sites: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -2160,23 +3031,23 @@ def process_trusted_sites(self, section_name: str = "trustedSites") -> bool: and not trusted_site["enabled"] ): logger.info( - "Payload for OTDS Trusted Site -> %s is disabled. Skipping...", + "Payload for OTDS Trusted Site -> '%s' is disabled. Skipping...", url, ) continue response = self._otds.add_trusted_site(url) if response: - logger.info("Added OTDS trusted site -> %s", trusted_site) + logger.info("Added OTDS trusted site -> %s", url) else: - logger.error("Failed to add trusted site -> %s", trusted_site) + logger.error("Failed to add trusted site -> %s", url) success = False self.write_status_file(success, section_name, self._trusted_sites) return success - # end method definition + # end method definition def process_system_attributes(self, section_name: str = "systemAttributes") -> bool: """Process OTDS system attributes in payload and create them in OTDS. @@ -2192,7 +3063,7 @@ def process_system_attributes(self, section_name: str = "systemAttributes") -> b """ if not self._system_attributes: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -2212,7 +3083,7 @@ def process_system_attributes(self, section_name: str = "systemAttributes") -> b if "enabled" in system_attribute and not system_attribute["enabled"]: logger.info( - "Payload for OTDS System Attribute -> %s is disabled. Skipping...", + "Payload for OTDS System Attribute -> '%s' is disabled. Skipping...", attribute_name, ) continue @@ -2228,13 +3099,13 @@ def process_system_attributes(self, section_name: str = "systemAttributes") -> b ) if response: logger.info( - "Added OTDS system attribute -> %s with value -> %s", + "Added OTDS system attribute -> '%s' with value -> %s", attribute_name, attribute_value, ) else: logger.error( - "Failed to add OTDS system attribute -> %s with value -> %s", + "Failed to add OTDS system attribute -> '%s' with value -> %s", attribute_name, attribute_value, ) @@ -2244,7 +3115,7 @@ def process_system_attributes(self, section_name: str = "systemAttributes") -> b return success - # end method definition + # end method definition def process_group_placeholders(self): """For some adminSettings we may need to replace a placeholder (sourrounded by %%...%%) @@ -2264,7 +3135,7 @@ def process_group_placeholders(self): # In this case we skip the element: if "enabled" in group and not group["enabled"]: logger.info( - "Payload for Group -> %s is disabled. Skipping...", group_name + "Payload for Group -> '%s' is disabled. Skipping...", group_name ) continue @@ -2288,7 +3159,7 @@ def process_group_placeholders(self): "Placeholder values after group processing = %s", self._placeholder_values ) - # end method definition + # end method definition def process_user_placeholders(self): """For some adminSettings we may need to replace a placeholder (sourrounded by %%...%%) @@ -2308,7 +3179,7 @@ def process_user_placeholders(self): # In this case we skip the element: if "enabled" in user and not user["enabled"]: logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue @@ -2331,7 +3202,7 @@ def process_user_placeholders(self): "Placeholder values after user processing = %s", self._placeholder_values ) - # end method definition + # end method definition def process_groups(self, section_name: str = "groups") -> bool: """Process groups in payload and create them in Extended ECM. @@ -2350,7 +3221,7 @@ def process_groups(self, section_name: str = "groups") -> bool: """ if not self._groups: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -2373,7 +3244,7 @@ def process_groups(self, section_name: str = "groups") -> bool: # In this case we skip the element: if "enabled" in group and not group["enabled"]: logger.info( - "Payload for Group -> %s is disabled. Skipping...", group_name + "Payload for Group -> '%s' is disabled. Skipping...", group_name ) continue @@ -2382,32 +3253,32 @@ def process_groups(self, section_name: str = "groups") -> bool: group_id = self.determine_group_id(group) if group_id: logger.info( - "Found existing group -> %s (%s). Skipping to next group...", + "Found existing group -> '%s' (%s). Skipping to next group...", group_name, group_id, ) continue - logger.info("Did not find an existing group - creating a new group...") - # Now we know it is a new group... new_group = self._otcs.add_group(group_name) if new_group: new_group_id = self._otcs.get_result_value(new_group, "id") - logger.debug("New group -> %s", new_group) + logger.info( + "New group -> '%s' with ID -> %s has been created...", + group_name, + new_group_id, + ) group["id"] = new_group_id else: - logger.error("Failed to create group -> %s", group_name) + logger.error("Failed to create group -> '%s'", group_name) success = False continue - logger.debug("Groups = %s", self._groups) - # Second run through groups: create all group memberships # (nested groups) based on the IDs created in first run: for group in self._groups: if not "id" in group: - logger.error("Group -> %s does not have an ID.", group["name"]) + logger.error("Group -> '%s' does not have an ID.", group["name"]) success = False continue parent_group_names = group["parent_groups"] @@ -2429,14 +3300,14 @@ def process_groups(self, section_name: str = "groups") -> bool: parent_group_id = self._otcs.get_result_value(parent_group, "id") if not parent_group_id: logger.error( - "Parent Group -> %s not found. Skipping...", + "Parent Group -> '%s' not found. Skipping...", parent_group_name, ) success = False continue elif not "id" in parent_group: logger.error( - "Parent Group -> %s does not have an ID. Cannot establish group nesting. Skipping...", + "Parent Group -> '%s' does not have an ID. Cannot establish group nesting. Skipping...", parent_group["name"], ) success = False @@ -2449,7 +3320,7 @@ def process_groups(self, section_name: str = "groups") -> bool: members = self._otcs.get_group_members(parent_group_id, 1) if self._otcs.exist_result_item(members, "id", group["id"]): logger.info( - "Group -> %s (%s) is already a member of parent group -> %s (%s). Skipping to next parent group...", + "Group -> '%s' (%s) is already a member of parent group -> '%s' (%s). Skipping to next parent group...", group["name"], group["id"], parent_group_name, @@ -2457,7 +3328,7 @@ def process_groups(self, section_name: str = "groups") -> bool: ) else: logger.info( - "Add group -> %s (%s) to parent group -> %s (%s)", + "Add group -> '%s' (%s) to parent group -> '%s' (%s)", group["name"], group["id"], parent_group_name, @@ -2469,7 +3340,7 @@ def process_groups(self, section_name: str = "groups") -> bool: return success - # end method definition + # end method definition def process_groups_m365(self, section_name: str = "groupsM365") -> bool: """Process groups in payload and create them in Microsoft 365. @@ -2483,15 +3354,16 @@ def process_groups_m365(self, section_name: str = "groupsM365") -> bool: Returns: bool: True if payload has been processed without errors, False otherwise """ + if not isinstance(self._m365, M365): logger.error( - "Office 365 connection not setup properly. Skipping payload section %s...", + "Microsoft 365 connection not setup properly. Skipping payload section '%s'...", section_name, ) return False if not self._groups: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -2514,12 +3386,14 @@ def process_groups_m365(self, section_name: str = "groupsM365") -> bool: # In this case we skip the element: if "enabled" in group and not group["enabled"]: logger.info( - "Payload for Group -> %s is disabled. Skipping...", group_name + "Payload for Group -> '%s' is disabled. Skipping...", group_name ) continue + # M365 is disabled per default. There needs to be "enable_o365" in payload + # and it needs to be True: if not "enable_o365" in group or not group["enable_o365"]: logger.info( - "Office 365 is not enabled in payload for Group -> %s. Skipping...", + "Microsoft 365 is not enabled in payload for Group -> '%s'. Skipping...", group_name, ) continue @@ -2547,20 +3421,18 @@ def process_groups_m365(self, section_name: str = "groupsM365") -> bool: # Have we found an exact match? if existing_group is not None: logger.info( - "Found existing Microsoft 365 group -> %s (%s) - skip creation of group...", + "Found existing Microsoft 365 group -> '%s' (%s) - skip creation of group...", existing_group["displayName"], existing_group["id"], ) # Write M365 group ID back into the payload (for the success file) group["m365_id"] = existing_group["id"] continue - logger.info( - "Did not find an exact match for the group - creating a new Microsoft 365 group..." - ) - else: - logger.info( - "Did not find any matching group - creating a new Microsoft 365 group..." - ) + + logger.info( + "Creating a new Microsoft 365 group -> '%s'...", + group_name, + ) # Now we know it is a new group... new_group = self._m365.add_group(group_name) @@ -2568,7 +3440,7 @@ def process_groups_m365(self, section_name: str = "groupsM365") -> bool: # Store the Microsoft 365 group ID in payload: group["m365_id"] = new_group["id"] logger.info( - "New Microsoft 365 group -> %s with ID -> %s has been created", + "New Microsoft 365 group -> '%s' with ID -> %s has been created", group_name, group["m365_id"], ) @@ -2579,10 +3451,10 @@ def process_groups_m365(self, section_name: str = "groupsM365") -> bool: return success - # end method definition + # end method definition - def process_users(self, section_name: str = "users") -> bool: - """Process users in payload and create them in Extended ECM. + def process_groups_salesforce(self, section_name: str = "groupsSalesforce") -> bool: + """Process groups in payload and create them in Salesforce. Args: section_name (str, optional): name of the section. It can be overridden @@ -2592,37 +3464,313 @@ def process_users(self, section_name: str = "users") -> bool: files written to the Admin Personal Workspace Returns: bool: True if payload has been processed without errors, False otherwise - Side Effects: - the user items are modified by adding an "id" dict element that - includes the technical ID of the user in Extended ECM """ - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not isinstance(self._salesforce, Salesforce): + logger.error( + "Salesforce connection not setup properly. Skipping payload section '%s'...", + section_name, + ) + return False + + if not self._groups: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we # can return True and skip processing it once more: - # if self.check_status_file(section_name): - # return True + if self.check_status_file(section_name): + return True success: bool = True - # Add all users in payload and establish membership in - # specified groups: - for user in self._users: - # Sanity checks: - if not "name" in user: - logger.error("User is missing a login. Skipping to next user...") + # First run through groups: create all groups in payload + # and store the IDs of the created groups: + for group in self._groups: + if not "name" in group: + logger.error("Group needs a name. Skipping...") success = False continue - user_name = user["name"] + group_name = group["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in group and not group["enabled"]: + logger.info( + "Payload for Group -> '%s' is disabled. Skipping...", group_name + ) + continue + # Salesforce is disabled per default. There needs to be "enable_salesforce" in payload + # and it needs to be True: + if not "enable_salesforce" in group or not group["enable_salesforce"]: + logger.info( + "Salesforce is not enabled in payload for Group -> '%s'. Skipping...", + group_name, + ) + continue + + # Check if the group does already exist (e.g. if job is restarted) + existing_group_id = self._salesforce.get_group_id(group_name) + if existing_group_id: + logger.info( + "Found existing Salesforce group -> '%s' (%s). Skipping...", + group_name, + existing_group_id, + ) + # Write M365 group ID back into the payload (for the success file) + group["salesforce_id"] = existing_group_id + continue + + logger.info( + "Creating a new Salesforce group -> '%s'...", + group_name, + ) + + # Now we know it is a new group... + new_group = self._salesforce.add_group(group_name) + new_group_id = self._salesforce.get_result_value(new_group, "id") + if new_group_id: + # Store the Microsoft 365 group ID in payload: + group["salesforce_id"] = new_group_id + logger.info( + "New Salesforce group -> '%s' with ID -> %s has been created.", + group_name, + new_group_id, + ) + else: + logger.error( + "Failed to create Salesforce group -> %s!", + group_name, + ) + success = False + + # Second run through groups: create all group memberships + # (nested groups) based on the IDs created in first run: + for group in self._groups: + if not "salesforce_id" in group: + logger.info( + "Group -> %s does not have an Salesforce ID. Skipping...", + group["name"], + ) + # Not all groups may be enabled for Salesforce. This is not an error. + continue + group_id = group["salesforce_id"] + parent_group_names = group["parent_groups"] + for parent_group_name in parent_group_names: + # First, try to find parent group in payload by parent group name: + parent_group = next( + ( + item + for item in self._groups + if item["name"] == parent_group_name + ), + None, + ) + if not parent_group: + logger.error( + "Parent Group -> '%s' not found. Cannot establish group nesting. Skipping...", + parent_group["name"], + ) + success = False + continue + if not "salesforce_id" in parent_group: + logger.info( + "Parent Group -> '%s' does not have a Salesforce ID. Cannot establish group nesting. Parent group may not be enabled for Salesforce. Skipping...", + parent_group["name"], + ) + # We don't treat this as an error - there may be payload groups which are not enabled for Salesforce! + continue + + parent_group_id = parent_group["salesforce_id"] + + # retrieve all members of the parent group + # to check if the current group is already a member in the parent group: + members = self._salesforce.get_group_members(parent_group_id) + if self._salesforce.exist_result_item( + members, "UserOrGroupId", group_id + ): + logger.info( + "Salesforce Group -> '%s' (%s) is already a member of parent Salesforce group -> '%s' (%s). Skipping to next parent group...", + group["name"], + group["id"], + parent_group_name, + parent_group_id, + ) + continue + logger.info( + "Add Salesforce group -> '%s' (%s) to parent Salesforce group -> '%s' (%s)", + group["name"], + group_id, + parent_group_name, + parent_group_id, + ) + self._salesforce.add_group_member( + group_id=parent_group_id, member_id=group_id + ) + + self.write_status_file(success, section_name, self._groups) + + return success + + # end method definition + + def process_groups_core_share(self, section_name: str = "groupsCoreShare") -> bool: + """Process groups in payload and create them in Core Share. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "webHooksPost") + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not isinstance(self._core_share, CoreShare): + logger.error( + "Core Share connection not setup properly. Skipping payload section '%s'...", + section_name, + ) + return False + + if not self._groups: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + # Create all groups specified in payload + # and store the IDs of the created Core Share groups: + for group in self._groups: + if not "name" in group: + logger.error("Group needs a name. Skipping...") + success = False + continue + group_name = group["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in group and not group["enabled"]: + logger.info( + "Payload for Group -> '%s' is disabled. Skipping...", group_name + ) + continue + # Core Share is disabled per default. There needs to be "enable_core_share" in payload + # and it needs to be True: + if not "enable_core_share" in group or not group["enable_core_share"]: + logger.info( + "Group -> '%s' is not enabled for Core Share. Skipping...", + group_name, + ) + continue + + # Check if the group does already exist (e.g. if job is restarted) + core_share_group = self._core_share.get_group_by_name(name=group_name) + core_share_group_id = self._core_share.get_result_value( + core_share_group, "id" + ) + if core_share_group_id: + logger.info( + "Found existing Core Share group -> '%s' (%s). Just do cleanup of potential left-overs...", + group_name, + core_share_group_id, + ) + # Write Core Share group ID back into the payload (for the success file) + group["core_share_id"] = core_share_group_id + + # For existing users we want to cleanup possible left-overs form old deployments + logger.info( + "Cleanup existing file shares of Core Share group -> '%s' (%s)...", + group_name, + core_share_group_id, + ) + response = self._core_share.cleanup_group_shares( + group_id=core_share_group_id, + ) + if not response: + logger.error("Failed to cleanup group shares!") + + continue + + logger.info( + "Creating a new Core Share group -> '%s'...", + group_name, + ) + + # Now we know it is a new group... + new_group = self._core_share.add_group(group_name) + new_group_id = self._core_share.get_result_value(new_group, "id") + if new_group_id: + # Store the Microsoft 365 group ID in payload: + group["core_share_id"] = new_group_id + logger.info( + "New Core Share group -> '%s' with ID -> %s has been created.", + group_name, + new_group_id, + ) + else: + logger.error( + "Failed to create Core Share group -> %s!", + group_name, + ) + success = False + + # Core Share groups cannot be nested. So we are fone here. + + self.write_status_file(success, section_name, self._groups) + + return success + + # end method definition + + def process_users(self, section_name: str = "users") -> bool: + """Process users in payload and create them in Extended ECM. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "webHooksPost") + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + Side Effects: + the user items are modified by adding an "id" dict element that + includes the technical ID of the user in Extended ECM + """ + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + # if self.check_status_file(section_name): + # return True + + success: bool = True + + # Add all users in payload and establish membership in + # specified groups: + for user in self._users: + # Sanity checks: + if not "name" in user: + logger.error("User is missing a login. Skipping to next user...") + success = False + continue + user_name = user["name"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: if "enabled" in user and not user["enabled"]: logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue @@ -2633,7 +3781,7 @@ def process_users(self, section_name: str = "users") -> bool: or user["password"] == "" ): logger.info( - "User -> %s no password defined in payload, generating random password...", + "User -> '%s' no password defined in payload, generating random password...", user_name, ) user["password"] = self.generate_password( @@ -2653,7 +3801,7 @@ def process_users(self, section_name: str = "users") -> bool: # Sanity checks: if not "base_group" in user: logger.warning( - "User -> %s is missing a base group - setting to default group", + "User -> '%s' is missing a base group - setting to default group", user_name, ) user["base_group"] = "DefaultGroup" @@ -2664,12 +3812,15 @@ def process_users(self, section_name: str = "users") -> bool: user_id = self.determine_user_id(user) if user_id: logger.info( - "Found existing user -> %s (%s). Skipping to next user...", + "Found existing user -> '%s' (%s). Skipping to next user...", user_name, user_id, ) continue - logger.info("Did not find an existing user - creating a new user...") + logger.info( + "Did not find an existing user with name '%s' - creating a new user...", + user_name, + ) # Find the base group of the user. Assume 'Default Group' (= 1001) if not found: base_group = next( @@ -2697,7 +3848,7 @@ def process_users(self, section_name: str = "users") -> bool: if new_user is not None: new_user_id = self._otcs.get_result_value(new_user, "id") logger.info( - "New user -> %s with ID -> %s has been created", + "New user -> '%s' with ID -> %s has been created", user_name, new_user_id, ) @@ -2722,7 +3873,7 @@ def process_users(self, section_name: str = "users") -> bool: group_id = self._otcs.get_result_value(group, "id") if group_id is None: logger.error( - "Group -> %s not found. Skipping...", user_group + "Group -> '%s' not found. Skipping...", user_group ) success = False continue @@ -2730,7 +3881,7 @@ def process_users(self, section_name: str = "users") -> bool: if group_id is None: logger.error( - "Group -> %s does not have an ID. Cannot add user -> %s to this group. Skipping...", + "Group -> '%s' does not have an ID. Cannot add user -> '%s' to this group. Skipping...", group_name, user["name"], ) @@ -2738,7 +3889,7 @@ def process_users(self, section_name: str = "users") -> bool: continue logger.info( - "Add user -> %s (%s) to group -> %s (%s)", + "Add user -> '%s' (%s) to group -> '%s' (%s)", user["name"], user["id"], group_name, @@ -2769,7 +3920,7 @@ def process_users(self, section_name: str = "users") -> bool: success = False continue logger.info( - "Set user attribute -> %s to -> %s", + "Set user attribute -> '%s' to -> %s", attribute_name, attribute_value, ) @@ -2789,11 +3940,9 @@ def process_users(self, section_name: str = "users") -> bool: return success - # end method definition + # end method definition - def process_users_sap( - self, sap_object: SAP, section_name: str = "usersSAP" - ) -> bool: + def process_users_sap(self, section_name: str = "usersSAP") -> bool: """Process users in payload and sync them with SAP (passwords for now). Args: @@ -2811,9 +3960,13 @@ def process_users_sap( """ if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not self._sap: + logger.error("SAP connection is not initialized. Bailing out...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: if self.check_status_file(section_name): @@ -2825,8 +3978,7 @@ def process_users_sap( rfc_description = "RFC to update the SAP user password" rfc_call_options = () - # Add all users in payload and establish membership in - # specified groups: + # Update SAP password for all users in payload: for user in self._users: # Sanity checks: if not "name" in user: @@ -2839,19 +3991,21 @@ def process_users_sap( # In this case we skip the element: if "enabled" in user and not user["enabled"]: logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue # Check if the user is enabled for SAP: if not "enable_sap" in user or not user["enable_sap"]: - logger.info("User -> %s is not enabled for SAP. Skipping...", user_name) + logger.info( + "User -> '%s' is not enabled for SAP. Skipping...", user_name + ) continue # Sanity checks: if not "password" in user: logger.error( - "User -> %s is missing a password. Cannot sync with SAP. Skipping to next user...", + "User -> '%s' is missing a password. Cannot sync with SAP. Skipping to next user...", user_name, ) success = False @@ -2864,24 +4018,24 @@ def process_users_sap( } logger.info( - "Updating password of user -> %s in SAP. Calling SAP RFC -> %s (%s) with parameters -> %s ...", + "Updating password of user -> '%s' in SAP. Calling SAP RFC -> '%s' (%s) with parameters -> %s ...", user_name, rfc_name, rfc_description, rfc_params, ) - result = sap_object.call(rfc_name, rfc_call_options, rfc_params) + result = self._sap.call(rfc_name, rfc_call_options, rfc_params) if result is None: logger.error( - "Failed to call SAP RFC -> %s to update password of user -> %s", + "Failed to call SAP RFC -> '%s' to update password of user -> %s", rfc_name, user_name, ) success = False elif result.get("RESULT") != "OK": logger.error( - "Result of SAP RFC -> %s is not OK, it returned -> %s failed items in result -> %s", + "Result of SAP RFC -> '%s' is not OK, it returned -> %s failed items in result -> %s", rfc_name, str(result.get("FAILED")), str(result), @@ -2891,7 +4045,9 @@ def process_users_sap( user["sap_sync_result"] = result else: logger.info( - "Successfully called RFC -> %s. Result -> %s", rfc_name, str(result) + "Successfully called RFC -> '%s'. Result -> %s", + rfc_name, + str(result), ) # Save result for status file content user["sap_sync_result"] = result @@ -2900,10 +4056,13 @@ def process_users_sap( return success - # end method definition + # end method definition - def process_users_m365(self, section_name: str = "usersM365") -> bool: - """Process users in payload and create them in Microsoft 365 via MS Graph API. + def process_users_successfactors( + self, + section_name: str = "usersSuccessFactors", + ) -> bool: + """Process users in payload and sync them with SuccessFactors (passwords and email). Args: section_name (str, optional): name of the section. It can be overridden @@ -2913,19 +4072,19 @@ def process_users_m365(self, section_name: str = "usersM365") -> bool: files written to the Admin Personal Workspace Returns: bool: True if payload has been processed without errors, False otherwise + Side Effects: + the user items are modified by adding an "successfactors_user_id" dict element that + includes the personIdExternal of the user in SuccessFactors """ - if not isinstance(self._m365, M365): - logger.error( - "Office 365 connection not setup properly. Skipping payload section -> %s...", - section_name, - ) - return False - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not self._successfactors: + logger.error("SuccessFactors connection is not initialized. Bailing out...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: if self.check_status_file(section_name): @@ -2933,8 +4092,7 @@ def process_users_m365(self, section_name: str = "usersM365") -> bool: success: bool = True - # Add all users in payload and establish membership in - # specified groups: + # traverse all users in payload: for user in self._users: # Sanity checks: if not "name" in user: @@ -2947,331 +4105,108 @@ def process_users_m365(self, section_name: str = "usersM365") -> bool: # In this case we skip the element: if "enabled" in user and not user["enabled"]: logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue - if not "enable_o365" in user or not user["enable_o365"]: + + # Check if the user is enabled for SuccessFactors: + if not "enable_successfactors" in user or not user["enable_successfactors"]: logger.info( - "Microsoft 365 is not enabled in payload for User -> %s. Skipping...", + "User -> '%s' is not enabled for SuccessFactors. Skipping...", user_name, ) continue - # Sanity checks: - if not "password" in user: + # Lookup password and email in payload: + user_password = user.get("password", "") + user_email = user.get("email", "") + + # first we need to get the SuccessFactors user account object + # to determine the personIdExternal that we need to update the + # SuccessFactors user. + response = self._successfactors.get_user_account(username=user_name) + user_id = self._successfactors.get_result_value( + response, "personIdExternal" + ) + if user_id is None: logger.error( - "User -> %s is missing a password. Skipping to next user...", + "Failed to get personIDExternal of SuccessFactors user -> %s", user_name, ) success = False continue - user_password = user["password"] - # be careful with the following fields - they could be empty - user_department = user.get("base_group", "") - user_first_name = user.get("firstname", "") - user_last_name = user.get("lastname", "") - user_location = user.get("location", "US") - user_email = user.get("email", user_name) + else: + logger.info( + "SuccessFactors User -> '%s' has External User ID -> %s", + user_name, + str(user_id), + ) - # Check if the user does already exist in M365 (e.g. if job is restarted) - m365_user_id = self.determine_user_id_m365(user) - if not m365_user_id: + # Now let's update the user password and email address: + update_data = {} + if user_password: logger.info( - "Did not find existing Micosoft 365 user - creating user %s...", - user_email, + "Updating password of SuccessFactors user -> '%s' (%s)...", + user_name, + str(user_id), ) + update_data["password"] = user_password + if user_email: + update_data["email"] = user_email - # Now we know it is a new user... - new_user = self._m365.add_user( - email=user_email, - password=user_password, - first_name=user_first_name, - last_name=user_last_name, - location=user_location, - department=user_department, + response = self._successfactors.update_user( + user_id=user_id, update_data=update_data + ) + if response: + logger.info( + "Successfully updated SuccessFactors user -> '%s'.", str(user_name) ) - if new_user is not None: - # Store the Microsoft 365 user ID in payload: - user["m365_id"] = new_user["id"] - logger.info( - "New Microsoft 365 user -> %s with ID -> %s has been created", - user_name, - user["m365_id"], - ) - else: - logger.error( - "Failed to create new Microsoft 365 user -> %s. Skipping...", - user_name, - ) - success = False - continue + # Save result for status file content + user["successfactors_user_id"] = user_id + else: + logger.error( + "Failed to update SuccessFactors user -> '%s'. Skipping...", + user_name, + ) + success = False + continue - # Now we assign a license to the new M365 user. - # First we see if there's a M365 SKU list in user - # payload - if not we wrap the default SKU configured - # for the m365 object into a single item list: - existing_user_licenses = self._m365.get_user_licenses(user["m365_id"]) - sku_list = user.get("m365_skus", [self._m365.config()["skuId"]]) - for sku_id in sku_list: - # Check if the M365 user already has this license: - if not self._m365.exist_result_item( - existing_user_licenses, "skuId", sku_id - ): - response = self._m365.assign_license_to_user( - user["m365_id"], sku_id - ) - if not response: - logger.error( - "Failed to assign license -> %s to Microsoft 365 user -> %s", - sku_id, - user_name, - ) - success = False - else: - if ( - not "m365_skus" in user - ): # this is only True if the default license from the m365 object is taken - user["m365_skus"] = [sku_id] - logger.info( - "License -> %s has been assigned to Microsoft 365 user -> %s", - sku_id, - user_name, - ) - else: - logger.info( - "Microsoft 365 user -> %s already has the license -> %s", - user_name, - sku_id, - ) + if not user_email: + continue - # Now we assign the Extended ECM Teams App to the new M365 user. - # First we check if the app is already assigned to the user. - # If not we install / assign the app. If the user already has - # the Extended ECM app we try to uprade it: - app_name = self._m365.config()["teamsAppName"] - response = self._m365.get_teams_apps_of_user( - user["m365_id"], - f"contains(teamsAppDefinition/displayName, '{app_name}')", + logger.info( + "Updating email of SuccessFactors user -> '%s' to -> %s...", + user_name, + user_email, ) - if self._m365.exist_result_item( - response, "displayName", app_name, sub_dict_name="teamsAppDefinition" - ): + response = self._successfactors.update_user_email( + user_id=user_id, email_address=user_email + ) + if response: logger.info( - "App -> %s is already installed for M365 user -> %s (%s). Trying to upgrade app...", - app_name, + "Successfully updated email address of SuccessFactors user -> '%s' to -> '%s'.", user_name, - user["m365_id"], - ) - response = self._m365.upgrade_teams_app_of_user( - user["m365_id"], app_name + user_email, ) else: - logger.info( - "Install M365 Teams app -> %s for M365 user -> %s", - app_name, - user_name, - ) - response = self._m365.assign_teams_app_to_user( - user["m365_id"], app_name - ) - if not response: - logger.error( - "Failed to install the App -> %s for M365 user -> %s", - app_name, - user_name, - ) - success = False - continue - - # Process Microsoft 365 group memberships of new user: - if "m365_id" in user: - user_id = user["m365_id"] - # don't forget the base group (department) ! - group_names = user["groups"] - if user_department: - group_names.append(user_department) - logger.info( - "User -> %s has these groups in payload -> %s (including base group -> %s). Checking if they are Microsoft 365 Groups...", + logger.error( + "Failed to update email address of SuccessFactors user -> '%s' to -> '%s'.", user_name, - group_names, - user_department, + user_email, ) - # Go through all group names: - for group_name in group_names: - # Find the group payload item to the parent group name: - group = next( - (item for item in self._groups if item["name"] == group_name), - None, - ) - if not group: - # if group is not in payload then this membership - # is not relevant for Microsoft 365. This could be system generated - # groups like "PageEdit" or "Business Administrators". - # In this case we do "continue" as we can't process parent groups - # either: - logger.info( - "No payload found for Group -> %s. Skipping...", group_name - ) - continue - elif not "enable_o365" in group or not group["enable_o365"]: - # If Microsoft 365 is not enabled for this group in - # the payload we don't create a M365 but we do NOT continue - # as there may still be parent groups that are M365 enabled - # we want to put the user in (see below): - logger.info( - "Payload Group -> %s is not enabled for M365.", group_name - ) - else: - response = self._m365.get_group(group_name) - if ( - response is None - or not "value" in response - or not response["value"] - ): - logger.error( - "Microsoft 365 Group -> %s not found. Skipping...", - group_name, - ) - success = False - else: - group_id = response["value"][0]["id"] - - # Check if user is already a member. We don't want - # to throw an error if the user is not found as a member - # so we pass show_error=False: - if self._m365.is_member( - group_id, user_id, show_error=False - ): - logger.info( - "Microsoft 365 user -> %s (%s) is already in Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - group_name, - group_id, - ) - else: - logger.info( - "Add Microsoft 365 user -> %s (%s) to Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - group_name, - group_id, - ) - response = self._m365.add_group_member( - group_id, user_id - ) - if not response: - logger.error( - "Failed to add Microsoft 365 user -> %s (%s) to Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - group_name, - group_id, - ) - success = False - - # As each group should have at least one owner in M365 - # we set all users also as owners for now. Later we - # may want to configure this via payload: - logger.info( - "Make Microsoft 365 user -> %s (%s) owner of Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - group_name, - group_id, - ) - response = self._m365.add_group_owner(group_id, user_id) - if not response: - logger.error( - "Failed to make Microsoft 365 user -> %s (%s) owner of Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - group_name, - group_id, - ) - success = False - - # As M365 groups are flat (not nested) we also add the - # user as member to the parent groups of the current group - # if the parent group is enabled for M365: - parent_group_names = group.get("parent_groups") - logger.info( - "Group -> %s has the following parent groups -> %s", - group_name, - parent_group_names, - ) - for parent_group_name in parent_group_names: - # Find the group dictionary item to the parent group name: - parent_group = next( - ( - item - for item in self._groups - if item["name"] == parent_group_name - ), - None, - ) - if ( - parent_group is None - or not "enable_o365" in parent_group - or not parent_group["enable_o365"] - ): - # if parent group is not in payload then this membership - # is not relevant for Microsoft 365. - # If Office 365 is not enabled for this parent group in - # the payload we can also skip: - logger.info( - "Parent Group -> %s is not enabled for M365. Skipping...", - group_name, - ) - continue - - response = self._m365.get_group(parent_group_name) - if ( - response is None - or not "value" in response - or not response["value"] - ): - logger.error( - "Microsoft 365 Group -> %s not found. Skipping...", - group_name, - ) - success = False - continue - parent_group_id = response["value"][0]["id"] - - # Check if user is already a member. We don't want - # to throw an error if the user is not found as a member: - if self._m365.is_member( - parent_group_id, user_id, show_error=False - ): - logger.info( - "Microsoft 365 user -> %s (%s) is already in Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - parent_group_name, - parent_group_id, - ) - continue - - logger.info( - "Add Microsoft 365 user -> %s (%s) to Microsoft 365 group -> %s (%s)", - user["name"], - user_id, - parent_group_name, - parent_group_id, - ) - self._m365.add_group_member(parent_group_id, user_id) + success = False self.write_status_file(success, section_name, self._users) return success - # end method definition + # end method definition - def process_teams_m365(self, section_name: str = "teamsM365") -> bool: - """Process groups in payload and create matching Teams in Microsoft 365. - We need to do this after the creation of the M365 users as wie require - Group Owners to create teams. + def process_users_salesforce( + self, + section_name: str = "usersSalesforce", + ) -> bool: + """Process users in payload and sync them with Salesforce (passwords and email). Args: section_name (str, optional): name of the section. It can be overridden @@ -3281,19 +4216,20 @@ def process_teams_m365(self, section_name: str = "teamsM365") -> bool: files written to the Admin Personal Workspace Returns: bool: True if payload has been processed without errors, False otherwise + Side Effects: + the user items are modified by adding "salesforce_user_id", "salesforce_user_login" + dict element that includes the ID of the user in Salesforce. This will be written + into the status file. """ - if not isinstance(self._m365, M365): - logger.error( - "Office 365 connection not setup properly. Skipping payload section -> %s...", - section_name, - ) - return False - - if not self._groups: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not self._salesforce: + logger.error("Salesforce connection is not initialized. Bailing out...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: if self.check_status_file(section_name): @@ -3301,91 +4237,320 @@ def process_teams_m365(self, section_name: str = "teamsM365") -> bool: success: bool = True - for group in self._groups: - if not "name" in group: - logger.error("Team needs a name. Skipping...") + # traverse all users in payload: + for user in self._users: + # Sanity checks: + if not "name" in user: + logger.error("User is missing a login. Skipping to next user...") success = False continue - group_name = group["name"] + user_name = user["name"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in group and not group["enabled"]: + if "enabled" in user and not user["enabled"]: logger.info( - "Payload for Group -> %s is disabled. Skipping...", group_name + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue - if not "enable_o365" in group or not group["enable_o365"]: + + # Check if the user is enabled for Salesforce: + if not "enable_salesforce" in user or not user["enable_salesforce"]: logger.info( - "Office 365 is not enabled in payload for Group -> %s. Skipping...", - group_name, + "User -> '%s' is not enabled for Salesforce. Skipping...", + user_name, ) continue - # Check if the M365 group does not exist (this should actually never happen at this point) - if not "m365_id" in group: - # The "m365_id" value is set by the method process_groups_m365() - logger.error( - "No M365 Group exist for group -> %s (M365 Group creation may have failed). Skipping...", - group_name, + extra_attributes = user.get("extra_attributes", None) + if not extra_attributes or len(extra_attributes) == 0: + logger.info( + "User -> '%s' does not have the extra attributes for Salesforce. Cannot determine the Salesforce login for user. Skipping...", + user_name, ) - success = False continue - - if self._m365.has_team(group_name): + user_login = extra_attributes[0].get("value", "") + if not user_login: logger.info( - "M365 group -> %s already has an MS Team connected. Skipping...", - group_name, + "User -> '%s' does not have the extra attributes value for Salesforce. Skipping...", + user_name, ) continue - logger.info( - "Create M365 Team -> %s for existing M365 Group -> %s...", - group_name, - group_name, - ) - # Now "upgrading" this group to a MS Team: - new_team = self._m365.add_team(group_name) - if not new_team: - success = False - continue + user_email = user.get("email", "") + need_email_verification = False - self.write_status_file(success, section_name, self._groups) + # + # 1. Check if user does already exist in Salesforce: + # - return success + salesforce_user_id = self._salesforce.get_user_id(username=user_login) - # end method definition + # + # 2: Create or Update user in Salesforce: + # - def process_teams_m365_apps( - self, section_name: str = "teamsM365Apps", tab_name: str = "Extended ECM" - ) -> bool: - """Process groups in payload and configure Extended ECM Teams Apps - as a tab called "Extended ECM". - We need to do this after the transport as we need top level folders - we can point the Extended ECM teams app to. + if salesforce_user_id is None: + logger.info( + "Salesforce user -> '%s' does not exist. Creating a new Salesforce user...", + user_name, + ) + response = self._salesforce.add_user( + username=user_login, + email=user.get("email", ""), + firstname=user.get("firstname", ""), + lastname=user.get("lastname", ""), + department=user.get("base_group", ""), + title=user.get("title", ""), + company_name=user.get("company", "Innovate"), + ) + salesforce_user_id = self._salesforce.get_result_value(response, "id") + if not salesforce_user_id: + logger.error( + "Failed to create Salesforce user -> '%s'. Skipping...", + user_name, + ) + success = False + continue + else: + logger.info( + "Successfully created Salesforce user -> '%s' with ID -> %s", + user_name, + salesforce_user_id, + ) + # We need email verification for new users (unclear if this is really the case...) + need_email_verification = True + # The user does already exist in Salesforce and we just update it: + else: + update_data = { + "FirstName": user.get("firstname", ""), + "LastName": user.get("lastname", ""), + "Department": user.get("base_group", ""), + "Title": user.get("title", ""), + "CompanyName": user.get("company", ""), + } + logger.info( + "Salesforce user -> '%s' does already exist. Updating Salesforce user with -> %s...", + user_name, + str(update_data), + ) - Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type + # Check if the mail address has really changed. Otherwise we + # don't need to set it again and can avoid email verification: + salesforce_user = self._salesforce.get_user(user_id=salesforce_user_id) + salesforce_user_email = self._salesforce.get_result_value( + salesforce_user, "Email" + ) + if user_email != salesforce_user_email: + logger.info( + "Email for Salesforce user -> '%s' has changed from -> '%s' to -> %s", + user_name, + salesforce_user_email, + user_email, + ) + # Additional email payload for user update: + update_data["Email"] = user_email + # OK, email has changed - we need the email verification below... + need_email_verification = True + + # Update the existing Salesforce user with new / changed data: + response = self._salesforce.update_user( + user_id=salesforce_user_id, + update_data=update_data, + ) + if not response: + logger.error( + "Failed to update Salesforce user -> '%s'. Skipping...", + user_login, + ) + success = False + continue + else: + logger.info( + "Successfully updated Salesforce user -> '%s'.", user_login + ) + + # Save result for status file content + user["salesforce_user_id"] = salesforce_user_id + user["salesforce_user_login"] = user_login + + # + # 3: Update user password in Salesforce (we need to do this also for new users!): + # + + # Lookup password in payload: + user_password = user.get("password", "") + + if user_password: + response = self._salesforce.update_user_password( + user_id=salesforce_user_id, password=user_password + ) + if response: + logger.info( + "Successfully updated password of Salesforce user -> '%s' (%s).", + user_login, + salesforce_user_id, + ) + else: + logger.error( + "Failed to update Salesforce password for user -> '%s' (%s). Skipping...", + user_login, + salesforce_user_id, + ) + success = False + continue + + # + # 4: Handle Email verification: + # + + # We now need to wait for the verification mail from Salesforce, + # get it from the M365 Outlook inbox of the user (or the admin + # if the user does not have its own inbox) and click the + # verification link... + + if need_email_verification: + logger.info( + "Processing Email verification for user -> '%s' (%s). Wait a few seconds to make sure verification mail in user's inbox...", + user_name, + user_email, + ) + time.sleep(20) + + # Process verification mail sent by Salesforce. + # This has some hard-coded value. We may want to optimize it further in the future: + result = self._m365.email_verification( + user_email=user_email, + sender="QA_SUPPORT@salesforce.com", + subject="Finish changing your Salesforce", + url_search_pattern="setup/emailverif", + ) + if not result: + # Email verification was not successful + logger.warning( + "Salesforce email verification failed. No verification mail received in user's inbox." + ) + # don't treat as error nor do "continue" here - we still want to process the user groups... + else: + logger.info( + "Successfully verified new email address -> %s", user_email + ) + # end if need_email_verification + + # + # 5: Add users into groups in Salesforce: + # + + logger.info( + "Processing group memberships of Salesforce user -> '%s' (%s)...", + user_name, + user_email, + ) + user_groups = user.get("groups", []) + base_group = user.get("base_group", None) + if base_group and not base_group in user_groups: + user_groups.append(base_group) # list of groups the user is in + + for user_group in user_groups: + # "Business Administrators" is a OTCS generated group that we won't find + # in payload - skip this group. + if user_group == "Business Administrators": + continue + # Try to find the group dictionary item in the payload + # for user group name: + group = next( + (item for item in self._groups if item["name"] == user_group), + None, + ) + if not group: + logger.error( + "Cannot find group with name -> '%s'. Cannot establish membership in Salesforce. Skipping to next group...", + user_group, + ) + success = False + continue + + group_id = group.get("salesforce_id") # Careful ID may not exist + group_name = group["name"] + if group_id is None: + logger.info( + "Group -> '%s' does not have a Salesforce ID. Cannot add user -> '%s' to this Salesforce group (group may not be enabled for Salesforce). Skipping...", + group_name, + user_name, + ) + # We don't treat this as an error - there may be payload groups which are not enabled for Salesforce! + continue + + existing_members = self._salesforce.get_group_members(group_id) + if not existing_members or not self._salesforce.exist_result_item( + existing_members, "UserOrGroupId", salesforce_user_id + ): + logger.info( + "Add Salesforce user -> '%s' (%s) to Salesforce group -> '%s' (%s)...", + user_name, + salesforce_user_id, + group_name, + group_id, + ) + response = self._salesforce.add_group_member( + group_id=group_id, member_id=salesforce_user_id + ) + member_id = self._salesforce.get_result_value(response, "id") + if not member_id: + logger.error( + "Failed to add Salesforce user -> '%s' (%s) as member to Salesforce group -> '%s' (%s)", + user_name, + salesforce_user_id, + group_name, + group_id, + ) + success = False + continue + else: + logger.info( + "Salesforce User -> '%s' (%s) does already exist in Salesforce group -> '%s' (%s). Skipping...", + user_name, + salesforce_user_id, + group_name, + group_id, + ) + # end for loop user groups + # end for loop users + + self.write_status_file(success, section_name, self._users) + + return success + + # end method definition + + def process_users_core_share( + self, + section_name: str = "usersCoreShare", + ) -> bool: + """Process users in payload and sync them with Core Share (passwords and email). + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type are used (e.g. the "Post" sections like "webHooksPost") This name is also used for the "success" status files written to the Admin Personal Workspace - tab_name (str, optional): Name of the Extended ECM tab. Default is "Extended ECM". Returns: bool: True if payload has been processed without errors, False otherwise + Side Effects: + the user items are modified by adding "core_share_user_id" + dict element that includes the ID of the user in Core Share. This will be written + into the status file. """ - if not isinstance(self._m365, M365): - logger.error( - "Office 365 connection not setup properly. Skipping payload section -> %s...", - section_name, - ) - return False - - if not self._groups: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not self._core_share: + logger.error("Core Share connection is not initialized. Bailing out...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: if self.check_status_file(section_name): @@ -3393,1600 +4558,1862 @@ def process_teams_m365_apps( success: bool = True - # Determine the ID of the Extended ECM App: - app_name = self._m365.config()["teamsAppName"] - response = self._m365.get_teams_apps(f"contains(displayName, '{app_name}')") - # Get the App catalog ID: - app_catalog_id = self._m365.get_result_value(response, "id", 0) - if not app_catalog_id: - logger.error("M365 App -> %s not found in catalog!", app_name) - return False - - for group in self._groups: - if not "name" in group: - logger.error("Team needs a name. Skipping...") + # traverse all users in payload: + for user in self._users: + # Sanity checks: + if not "lastname" in user or not "firstname" in user: + logger.error( + "User is missing last name or first name. Skipping to next user..." + ) success = False continue - group_name = group["name"] + user_last_name = user["lastname"] + user_first_name = user["firstname"] + user_name = user_first_name + " " + user_last_name # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in group and not group["enabled"]: + if "enabled" in user and not user["enabled"]: logger.info( - "Payload for Group -> %s is disabled. Skipping...", group_name + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue - if not "enable_o365" in group or not group["enable_o365"]: + + # Check if the user is enabled for Core Share: + if not "enable_core_share" in user or not user["enable_core_share"]: logger.info( - "Office 365 is not enabled in payload for Group -> %s. Skipping...", - group_name, + "User -> '%s' is not enabled for Core Share. Skipping...", + user_name, ) continue + user_email = user.get("email", "") + user_password = user.get("password", "") + + # Initialize variables: + need_email_verification = False + # - # Now we create a tab in the "General" channel for the Extended ECM Teams App + # 1. Check if user does already exist in Core Share: # - # 1. Check if the tab is already assigned to the General channel. - # This determines if we need to create or update the tab / app: - app_name = self._m365.config()["teamsAppName"] - response = self._m365.get_team_channel_tabs( - team_name=group_name, channel_name="General" - ) - # Check if tab is already there: - if self._m365.exist_result_item(response, "displayName", tab_name): - logger.info( - "M365 Teams app -> %s is already configured for M365 Team -> %s. Updating it with new URLs and IDs...", - app_name, - tab_name, - ) - update = True # update existing tab - else: - logger.info( - "Add tab -> %s to channel -> General of M365 Team -> %s for app -> %s", - tab_name, - group_name, - app_name, - ) - update = False # create new tab + core_share_user_id = self.determine_user_id_core_share(user) - # 2. Determine the M365 Team ID. If the team is not found then skip: - response = self._m365.get_team(group_name) - team_id = self._m365.get_result_value(response, "id", 0) - if not team_id: - logger.error("M365 Team -> %s not found!", group_name) - success = False - continue + # + # 2: Create or Update user in Core Share: + # - # 3. Install the App for the particular M365 Team (if it is not yet installed): - response = self._m365.get_teams_apps_of_team( - team_id, - f"contains(teamsAppDefinition/displayName, '{app_name}')", - ) - if self._m365.exist_result_item( - response, "displayName", app_name, sub_dict_name="teamsAppDefinition" - ): + # Check if we need to create a new Core Share user: + if core_share_user_id is None: logger.info( - "App -> %s is already installed for M365 Team -> %s (%s). Trying to upgrade app...", - app_name, - group_name, - team_id, + "Core Share user -> '%s' does not exist. Creating a new Core Share user...", + user_name, ) - response = self._m365.upgrade_teams_app_of_team(team_id, app_name) - if not response: - logger.error( - "Failed to upgrade the existing app -> %s for the M365 Team -> %s", - app_name, - group_name, - ) - success = False - continue - else: - logger.info( - "Install M365 Teams app -> %s for M365 team -> %s", - app_name, - group_name, + response = self._core_share.add_user( + first_name=user_first_name, + last_name=user_last_name, + email=user_email, + title=user.get("title", None), + company=user.get("company", "Innovate"), + password=user.get("password", None), ) - response = self._m365.assign_teams_app_to_team( - team_id=team_id, app_id=app_catalog_id + core_share_user_id = self._core_share.get_result_value( + response=response, key="id" ) - if not response: + if not core_share_user_id: logger.error( - "Failed to install App -> %s (%s) for M365 Team -> %s", - app_name, - app_catalog_id, - group_name, + "Failed to create Core Share user -> '%s'. Skipping...", + user_name, ) success = False continue + else: + logger.info( + "Successfully created Core Share user -> '%s' with ID -> %s", + user_name, + core_share_user_id, + ) - # 4. Create a Tab in the "General" channel of the M365 Team: - if group_name == "Innovate": - # Use the Enterprise Workspace for the - # top-level group "Innovate": - node_id = 2000 - else: - # We assume the departmental group names are identical to - # top-level folders in the Enterprise volume - node = self._otcs.get_node_by_parent_and_name(2000, group_name) - node_id = self._otcs.get_result_value(node, "id") - if not node_id: - logger.warning( - "Cannot find a top-level container for group -> %s. Cannot configure M365 Teams app. Skipping...", - group_name, + # Check if the user is still in pending state: + is_confirmed = self._core_share.get_result_value( + response=response, key="isConfirmed" + ) + # we need to differentiate False an None here - don't simplify to "if not is_confirmed"! + if is_confirmed is False: + logger.info( + "New Core Share user -> '%s' is not yet confirmed and in a 'pending' state!", + user_name, + ) + elif is_confirmed is True: + logger.info( + "New Core Share user -> '%s' is already confirmed!", + user_name, ) - continue - app_url = ( - self._otcs_frontend.cs_support_public_url() - ) # it is important to use the frontend pod URL here - app_url += "/xecmoffice/teamsapp.html?nodeId=" - app_url += ( - str(node_id) + "&type=container&parentId=2000&target=content&csurl=" - ) - app_url += self._otcs_frontend.cs_public_url() - app_url += "&appId=" + app_catalog_id + # We write the user password in addition to the "Other" Address field + # to determine in a subsequent deployment the "old" password: + update_data = { + "addresses": [ + {"type": "other", "value": user.get("password", None)} + ] + } + response = self._core_share.update_user( + user_id=core_share_user_id, update_data=update_data + ) - if update: - # App / Tab exist but needs to be updated with new - # IDs for the new deployment of Extended ECM - # as the M365 Teams survive between Terrarium deployments: + # We need email verification for new users: + need_email_verification = True + url_search_pattern = "verify-email" + subject = "Welcome to OpenText Core Share" + # For new users the old password is equal to the new password: + old_password = user.get("password", None) + # The user does already exist in Core Share: + else: + update_data = { + "firstName": user.get("firstname", ""), + "lastName": user.get("lastname", ""), + "title": user.get("title", ""), + "company": user.get("company", ""), + } logger.info( - "Updating tab -> %s of M365 Team channel -> General for app -> %s (%s) with new URLs and node IDs", - tab_name, - app_name, - app_catalog_id, + "Core Share user -> '%s' does already exist. Updating Core Share user with -> %s...", + user_name, + str(update_data), ) - response = self._m365.update_teams_app_of_channel( - team_name=group_name, - channel_name="General", - tab_name=tab_name, - app_url=app_url, - cs_node_id=node_id, + # Fetch the existing user: + core_share_user = self._core_share.get_user_by_id( + user_id=core_share_user_id ) - else: - # Tab does not exist in "General" channel so we - # add / configure it freshly: - logger.info( - "Adding tab -> %s to M365 Team channel -> General for app -> %s (%s)", - tab_name, - app_name, - app_catalog_id, + # Check if the user is still in pending state: + is_confirmed = self._core_share.get_result_value( + response=core_share_user, key="isConfirmed" ) + # we need to differentiate False an None here - don't simplify to "if not is_confirmed"! + if is_confirmed is False: + logger.warning( + "Core Share user -> '%s' has not yet confirmed the email invitation and is in 'pending' state! Resend invite...", + user_name, + ) + # We try the email verification once more... + self._core_share.resend_user_invite(core_share_user_id) + need_email_verification = True + url_search_pattern = "confirm-account" + subject = "Invitation to OpenText Core Share" + + # Check if we have the old password of the user in the "Other" address field: + core_share_user_addresses = self._core_share.get_result_value( + core_share_user, "addresses" + ) + if core_share_user_addresses and len(core_share_user_addresses) > 0: + old_password = core_share_user_addresses[0]["value"] + logger.info( + "Found old password for Core Share user -> '%s' (%s)", + user_name, + core_share_user_id, + ) + else: + logger.info( + "No old password found for Core Share user -> '%s'. Cannot set a new password.", + user_name, + ) + old_password = "" - response = self._m365.add_teams_app_to_channel( - team_name=group_name, - channel_name="General", - app_id=app_catalog_id, - tab_name=tab_name, - app_url=app_url, - cs_node_id=node_id, + # We store the current password into the address field (this adds to the update dictionary + # defined above and used below): THIS IS CURRENTLY NOT WORKING! + update_data["addresses"] = [{"type": "other", "value": user_password}] + + # Check if the mail address has really changed. Otherwise we + # don't need to set it again and can avoid email verification: + core_share_user_email = self._core_share.get_result_value( + core_share_user, "email" + ) + if user_email != core_share_user_email: + logger.info( + "Email for Core Share user -> '%s' has changed from -> '%s' to -> '%s'. We need to verify this via email.", + user_name, + core_share_user_email, + user_email, + ) + # Additional email payload for user update: + update_data["email"] = user_email + # If email is changed this needs to be confirmed by passing + # the current (old) password: + update_data["password"] = ( + old_password if old_password else user_password + ) + # As email has changed - we need the email verification below... + need_email_verification = True + url_search_pattern = "verify-email" + subject = "OpenText Core Share: Email Updated" + + # Update the existing Core Share user with new / changed data: + response = self._core_share.update_user( + user_id=core_share_user_id, + update_data=update_data, ) if not response: logger.error( - "Failed to add tab -> %s to M365 Team channel -> General for app -> %s (%s)", - tab_name, - app_name, - app_catalog_id, + "Failed to update Core Share user -> '%s'. Skipping...", + user_name, + ) + success = False + continue + else: + logger.info( + "Successfully updated Core Share user -> '%s'.", user_name ) - self.write_status_file(success, section_name, self._groups) - - return success - - # end method definition - - def cleanup_stale_teams_m365(self, workspace_types: list) -> bool: - """Delete Microsoft Teams that are left-overs from former deployments. - This method is currently not used. - - Args: - workspace_types (list): list of all workspace types - Returns: - bool: True if successful, False otherwise - """ - - if not isinstance(self._m365, M365): - logger.error( - "Office 365 connection not setup properly. Skipping cleanup...", - ) - return False - - if workspace_types == []: - logger.error("Empty workspace type list!") - return False + # Now update the password: + if user_password and old_password and user_password != old_password: + response = self._core_share.update_user_password( + user_id=core_share_user_id, + password=old_password, + new_password=user_password, + ) + if response: + logger.info( + "Successfully updated password of Core Share user -> '%s' (%s).", + user_name, + core_share_user_id, + ) + else: + logger.error( + "Failed to update Core Share password for user -> '%s' (%s). Skipping...", + user_name, + core_share_user_id, + ) + success = False + continue + else: + if not old_password: + logger.warning( + "Cannot change Core Share user password for -> '%s' (%s). Need both, old and new passwords.", + user_name, + core_share_user_id, + ) + else: + logger.info( + "Core Share user password for -> '%s' (%s) is unchanged.", + user_name, + core_share_user_id, + ) - for workspace_type in workspace_types: - if not "name" in workspace_type: - logger.error( - "Workspace type -> %s does not have a name. Skipping...", - workspace_type, - ) - continue - response = self._otcs.get_workspace_instances(workspace_type["name"]) - workspace_instances = response["results"] - if not workspace_instances: + # For existing users we want to cleanup possible left-overs form old deployments logger.info( - "Workspace type -> %s does not have any instances!", - workspace_type["name"], + "Cleanup existing file shares of Core Share user -> '%s' (%s)...", + user_name, + core_share_user_id, ) - continue - for workspace_instance in workspace_instances: - workspace_name = workspace_instance["data"]["properties"]["name"] - logger.info( - "Check if stale Microsoft 365 Teams with name -> %s exist...", - workspace_name, + response = self._core_share.cleanup_user_files( + user_id=core_share_user_id, + user_login=core_share_user_email, + user_password=user_password, ) - response = self._m365.delete_teams(workspace_name) + if not response: + logger.error("Failed to cleanup user files!") - return True + # Save result for status file content + user["core_share_user_id"] = core_share_user_id - # end method definition + # + # 3: Handle Email verification: + # - def cleanup_all_teams_m365(self, section_name: str = "teamsM365Cleanup") -> bool: - """Delete Microsoft Teams that are left-overs from former deployments + # We now need to wait for the verification mail from Core Share, + # get it from the M365 Outlook inbox of the user (or the admin + # if the user does not have its own inbox) and click the + # verification link... - Args: - section_name (str, optional): name of the section. - This name is used for the "success" status - files written to the Admin Personal Workspace - Returns: - bool: True if teams have been deleted, False otherwise - """ + if need_email_verification: + logger.info( + "Processing Email verification for user -> '%s' (%s). Wait a few seconds to make sure verification mail in user's inbox...", + user_name, + user_email, + ) + time.sleep(20) + + # Process verification mail sent by Core Share. + # This has some hard-coded value. We may want to optimize it further in the future: + result = self._m365.email_verification( + user_email=user_email, + sender="noreply@opentext.cloud", + subject=subject, + url_search_pattern=url_search_pattern, + line_end_marker="=", + multi_line=True, + multi_line_end_marker="%3D", + replacements=None, + max_retries=6, + use_browser_automation=True, + password=user_password, + password_field_id="passwordInput", + password_confirmation_field_id="confirmResetPassword", + password_submit_xpath="//button[@type='submit']", + terms_of_service_xpath="//div[@id = 'termsOfService']//button[@type='submit']", + ) + if not result: + # Email verification was not successful + logger.warning( + "Core Share email verification failed. No verification mail received in user's inbox." + ) + # don't treat as error nor do "continue" here - we still want to process the user groups... + else: + logger.info( + "Successfully verified new email address -> %s", user_email + ) + # end if need_email_verification - if not isinstance(self._m365, M365): - logger.error( - "Office 365 connection not setup properly. Skipping payload section -> %s...", - section_name, - ) - return False + # + # 4: Add users into groups in Core Share: + # - # We want this cleanup to only run once even if we have - # multiple payload files - so we pass payload_specific=False here: - if self.check_status_file( - payload_section_name=section_name, payload_specific=False - ): logger.info( - "Payload section -> %s has been processed successfully before. Skip cleanup of M365 teams...", - section_name, + "Processing group memberships of Core Share user -> '%s' (%s)...", + user_name, + user_email, ) - return True - - logger.info("Processing payload section -> %s...", section_name) + user_groups = user.get("groups", []) + base_group = user.get("base_group", None) + if base_group and not base_group in user_groups: + user_groups.append(base_group) # list of groups the user is in + + for user_group in user_groups: + # "Business Administrators" is a OTCS generated group that we won't find + # in payload - skip this group. + if user_group == "Business Administrators": + # Users that are Business Administrators in Extended ECM + # become Content Manager (role = 5) in Core Share: + logger.info( + "User -> '%s' is a business administrator in Extended ECM and becomes a 'Content Manager' (access role 5) in Core Share", + user_name, + ) + self._core_share.add_user_access_role( + user_id=core_share_user_id, role_id=5 + ) + continue + # Try to find the group dictionary item in the payload + # for user group name: + group = next( + (item for item in self._groups if item["name"] == user_group), + None, + ) + if not group: + logger.error( + "Cannot find group with name -> '%s'. Cannot establish membership in Core Share. Skipping to next group...", + user_group, + ) + success = False + continue - # We don't want to delete MS Teams that are matching the regular OTCS Group Names (like "Sales") - exception_list = self.get_all_group_names() + group_name = group["name"] + group_id = self.determine_group_id_core_share( + group + ) # Careful ID may not exist + if group_id is None: + logger.info( + "Group -> '%s' does not have a Core Share ID. Cannot add user -> '%s' to this Core Share group (group may not be enabled for Core Share). Skipping...", + group_name, + user_name, + ) + # We don't treat this as an error - there may be payload groups which are not enabled for Core Share! + continue - # These are the patterns that each MS Teams needs to match at least one of to be deleted - # Pattern 1: all MS teams with a name that has a number in brackets, line "(1234)" - # Pattern 2: all MS Teams with a name that starts with a number followed by a space, - # followed by a "-" and followed by another space - # Pattern 3: all MS Teams with a name that starts with "WS" and a 1-4 digit number - # (these are the workspaces for Purchase Contracts generated for Intelligent Filing) - # Pattern 4: all MS Teams with a name that ends with a 1-2 character + a number in brackets, like (US-1000) - # this is a specialization of pattern 1 - # Pattern 5: remove the teams that are created for the dummy copy&paste template for the - # Intelligent Filing workspaces - pattern_list = [ - r"\(\d+\)", - r"\d+\s-\s", - r"^WS\d{1,4}$", - r"^.+?\s\(.{1,2}-\d+\)$", - r"Purchase\sContract\s\(Template\sfor\sIntelligent\sFiling\)", - ] + existing_members = self._core_share.get_group_members(group_id) - result = self._m365.delete_all_teams(exception_list, pattern_list) + # Only add user as new member if not yet a member or a 'pending' member: + is_member = self._core_share.exist_result_item( + response=existing_members, + key="id", + value=core_share_user_id, + results_marker="groupMembers", + ) + is_pending_member = self._core_share.exist_result_item( + response=existing_members, + key="email", + value=user_email, + results_marker="pending", + ) - # We want this cleanup to only run once even if we have - # multiple payload files - so we pass payload_specific=False here: - self.write_status_file( - success=True, - payload_section_name=section_name, - payload_section=exception_list + pattern_list, - payload_specific=False, - ) + if not is_member and not is_pending_member: + logger.info( + "Add Core Share user -> '%s' (%s) to Core Share group -> '%s' (%s)...", + user_name, + core_share_user_id, + group_name, + group_id, + ) + # We make users that have this group as base_group + # to Admins of the Core Share group: + is_group_admin = user_group == base_group + response = self._core_share.add_group_member( + group_id=group_id, + user_id=core_share_user_id, + is_group_admin=is_group_admin, + ) + # the add_group_member() has a special return value + # which is a list (not a dict). It has mostly 1 element + # which is a dict with a "success" item. This (and not response.ok) + # determines if the call was successful! + success: bool = self._core_share.get_result_value( + response, "success" + ) + if not success: + errors = self._core_share.get_result_value(response, "errors") + logger.error( + "Failed to add Core Share user -> '%s' (%s) as member to Core Share group -> '%s' (%s). Error -> %s", + user_name, + core_share_user_id, + group_name, + group_id, + str(errors), + ) + success = False + continue + else: + logger.info( + "Core Share User -> '%s' (%s) is already a %s of Core Share group -> '%s' (%s). Skipping...", + user_name, + core_share_user_id, + "member" if is_member else "pending member", + group_name, + group_id, + ) + # end for loop user groups + # end for loop users - return result + self.write_status_file(success, section_name, self._users) - # end method definition + return success - def process_admin_settings( - self, admin_settings: list, section_name: str = "adminSettings" - ) -> bool: - """Process admin settings in payload and import them to Extended ECM. + # end method definition - The payload section is a list of dicts with these items: - { - enabled: True or False to enable or disable the payload item - filename: The filename of the XML file with admin settings. - It needs to be the plain filename like "admin.xml". - The files reside inside the container in /settings root - directory. They are placed there by the Terraform automation - and are taken from the ./settings/payload directory. - description: Some description about the purpose of the settings. - Just for information and optional. - } + def process_users_m365(self, section_name: str = "usersM365") -> bool: + """Process users in payload and create them in Microsoft 365 via MS Graph API. Args: - admin_settings (list): list of admin settings. We need this parameter - as we process two different lists. section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type - are used (e.g. the "Post" sections like "adminSettingsPost") + are used (e.g. the "Post" sections like "webHooksPost") This name is also used for the "success" status files written to the Admin Personal Workspace Returns: - bool: True if a restart of the OTCS pods is required. False otherwise. + bool: True if payload has been processed without errors, False otherwise """ - if not admin_settings: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return False # important to return False here as otherwise we are triggering a restart of services!! + if not isinstance(self._m365, M365): + logger.error( + "Microsoft 365 connection not setup properly. Skipping payload section -> '%s'...", + section_name, + ) + return False + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True # If this payload section has been processed successfully before we - # can return False and skip processing it once more: + # can return True and skip processing it once more: if self.check_status_file(section_name): - return False # important to return False here as otherwise we are triggering a restart of services!! + return True - restart_required: bool = False success: bool = True - for admin_setting in admin_settings: + # Add all users in payload and establish membership in + # specified groups: + for user in self._users: # Sanity checks: - if not "filename" in admin_setting: - logger.error("Filename is missing. Skipping to next admin setting...") + if not "name" in user: + logger.error("User is missing a login. Skipping to next user...") + success = False continue - filename = admin_setting["filename"] + user_name = user["name"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in admin_setting and not admin_setting["enabled"]: + if "enabled" in user and not user["enabled"]: logger.info( - "Payload for setting file -> %s is disabled. Skipping...", filename + "Payload for User -> '%s' is disabled. Skipping...", user_name + ) + continue + if not "enable_o365" in user or not user["enable_o365"]: + logger.info( + "Microsoft 365 is not enabled in payload for User -> '%s'. Skipping...", + user_name, ) continue - settings_file = self._custom_settings_dir + filename - if os.path.exists(settings_file): - description = admin_setting.get("description") - if description: - logger.info(description) - - # Read the config file: - with open(settings_file, "r", encoding="utf-8") as file: - file_content = file.read() + # Sanity checks: + if not "password" in user: + logger.error( + "User -> '%s' is missing a password. Skipping to next user...", + user_name, + ) + success = False + continue + user_password = user["password"] + # be careful with the following fields - they could be empty + user_department = user.get("base_group", "") + user_first_name = user.get("firstname", "") + user_last_name = user.get("lastname", "") + user_location = user.get("location", "US") + user_email = user.get("email", user_name) - logger.debug( - "Replace Placeholder -> %s in file -> %s", - self._placeholder_values, - file_content, + # Check if the user does already exist in M365 (e.g. if job is restarted) + m365_user_id = self.determine_user_id_m365(user) + if not m365_user_id: + logger.info( + "Did not find existing Microsoft 365 user - creating user %s...", + user_email, ) - file_content = self.replace_placeholders(file_content) + # Now we know it is a new user... + # We are not 1:1 using the email address from the + # payload as this could by an alias address using the "+" syntax: + m365_user_name = user_name + "@" + self._m365.config()["domain"] - # Write the updated config file: - tmpfile = "/tmp/" + os.path.basename(settings_file) - with open(tmpfile, "w", encoding="utf-8") as file: - file.write(file_content) + new_user = self._m365.add_user( + email=m365_user_name, + password=user_password, + first_name=user_first_name, + last_name=user_last_name, + location=user_location, + department=user_department, + ) + if new_user is not None: + # Store the Microsoft 365 user ID in payload: + user["m365_id"] = new_user["id"] + m365_user_id = new_user["id"] + logger.info( + "New Microsoft 365 user -> '%s' with ID -> %s has been created", + user_name, + m365_user_id, + ) + else: + logger.error( + "Failed to create new Microsoft 365 user -> '%s'. Skipping...", + user_name, + ) + success = False + continue + else: + # if the user exists we just set the password according + # the the payload definition to allow to bulk + # update existing M365 users with new passwords: + logger.info( + "Found existing Microsoft 365 user -> '%s' - updating password...", + user_name, + ) + new_password_settings = { + "passwordProfile": { + "forceChangePasswordNextSignIn": False, + "password": user_password, + } + } + # response = self._m365.update_user(user_email, new_password_settings) + response = self._m365.update_user(m365_user_id, new_password_settings) + if not response: + logger.error( + "Failed to update password of M365 user -> '%s' (%s)", + user_name, + m365_user_id, + ) - response = self._otcs.apply_config(tmpfile) - if response and response["results"]["data"]["restart"]: - logger.info("A restart of Extended ECM service is required.") - restart_required = True + # Now we assign a license to the new M365 user. + # First we see if there's a M365 SKU list in user + # payload - if not we wrap the default SKU configured + # for the m365 object into a single item list: + existing_user_licenses = self._m365.get_user_licenses(m365_user_id) + sku_list = user.get("m365_skus", [self._m365.config()["skuId"]]) + for sku_id in sku_list: + # Check if the M365 user already has this license: + if not self._m365.exist_result_item( + existing_user_licenses, "skuId", sku_id + ): + response = self._m365.assign_license_to_user(m365_user_id, sku_id) + if not response: + logger.error( + "Failed to assign license -> '%s' to Microsoft 365 user -> '%s'", + sku_id, + user_name, + ) + success = False + else: + if ( + not "m365_skus" in user + ): # this is only True if the default license from the m365 object is taken + user["m365_skus"] = [sku_id] + logger.info( + "License -> '%s' has been assigned to Microsoft 365 user -> %s", + sku_id, + user_name, + ) + else: + logger.info( + "Microsoft 365 user -> '%s' already has the license -> %s", + user_name, + sku_id, + ) + + # Now we assign the Extended ECM Teams App to the new M365 user. + # First we check if the app is already assigned to the user. + # If not we install / assign the app. If the user already has + # the Extended ECM app we try to upgrade it: + app_name = self._m365.config()["teamsAppName"] + app_external_id = self._m365.config()["teamsAppExternalId"] + app_internal_id = self._m365.config().get("teamsAppInternalId", None) + response = self._m365.get_teams_apps_of_user( + user_id=m365_user_id, + filter_expression="contains(teamsAppDefinition/displayName, '{}')".format( + app_name + ), + ) + if self._m365.exist_result_item( + response=response, + key="displayName", + value=app_name, + sub_dict_name="teamsAppDefinition", + ): + logger.info( + "M365 Teams App -> '%s' is already assigned to M365 user -> '%s' (%s). Trying to upgrade app...", + app_name, + user_name, + m365_user_id, + ) + response = self._m365.upgrade_teams_app_of_user( + user_id=m365_user_id, app_name=app_name + ) else: - logger.error("Admin settings file -> %s not found.", settings_file) - success = False + logger.info( + "Assign M365 Teams app -> '%s' (%s) to M365 user -> '%s' (%s)", + app_name, + app_external_id, + user_name, + m365_user_id, + ) + # This can produce errors because the app may be assigned organization-wide. + # So we don't treat it as an error and just show a warning. + # We also try to use the internal app id instead of the name: + if app_internal_id: + response = self._m365.assign_teams_app_to_user( + user_id=m365_user_id, + app_name=app_name, + app_internal_id=app_internal_id, + show_error=False, + ) + else: + response = self._m365.assign_teams_app_to_user( + user_id=m365_user_id, app_name=app_name, show_error=False + ) + # if not response: + # logger.warning( + # "Failed to assign M365 Teams App -> '%s' (%s) to M365 user -> '%s' (%s)", + # app_name, + # app_id, + # user_name, + # m365_user_id, + # ) + # success = False + # continue - self.write_status_file(success, section_name, admin_settings) + # Process Microsoft 365 group memberships of new user: + # don't forget the base group (department) if it is not yet in groups! + group_names = user.get("groups", []) + if user_department and not user_department in group_names: + group_names.append(user_department) + logger.info( + "User -> '%s' has these groups in payload -> %s (including base group -> %s). Checking if they are Microsoft 365 Groups...", + user_name, + group_names, + user_department, + ) + # Go through all group names: + for group_name in group_names: + # Find the group payload item to the parent group name: + group = next( + (item for item in self._groups if item["name"] == group_name), + None, + ) + if not group: + # if group is not in payload then this membership + # is not relevant for Microsoft 365. This could be system generated + # groups like "PageEdit" or "Business Administrators". + # In this case we do "continue" as we can't process parent groups + # either: + logger.debug( + "No payload found for Group -> '%s'. Skipping...", group_name + ) + continue + elif not "enable_o365" in group or not group["enable_o365"]: + # If Microsoft 365 is not enabled for this group in + # the payload we don't create a M365 but we do NOT continue + # as there may still be parent groups that are M365 enabled + # we want to put the user in (see below): + logger.info( + "Payload Group -> '%s' is not enabled for M365.", group_name + ) + else: + response = self._m365.get_group(group_name) + if ( + response is None + or not "value" in response + or not response["value"] + ): + logger.error( + "Microsoft 365 Group -> '%s' not found. Skipping...", + group_name, + ) + success = False + else: + group_id = response["value"][0]["id"] - return restart_required + # Check if user is already a member. We don't want + # to throw an error if the user is not found as a member + # so we pass show_error=False: + if self._m365.is_member( + group_id, m365_user_id, show_error=False + ): + logger.info( + "Microsoft 365 user -> '%s' (%s) is already in Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + group_name, + group_id, + ) + else: + logger.info( + "Add Microsoft 365 user -> '%s' (%s) to Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + group_name, + group_id, + ) + response = self._m365.add_group_member( + group_id, m365_user_id + ) + if not response: + logger.error( + "Failed to add Microsoft 365 user -> '%s' (%s) to Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + group_name, + group_id, + ) + success = False - # end method definition + # As each group should have at least one owner in M365 + # we set all users also as owners for now. Later we + # may want to configure this via payload: + logger.info( + "Make Microsoft 365 user -> '%s' (%s) owner of Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + group_name, + group_id, + ) + response = self._m365.add_group_owner( + group_id, m365_user_id + ) + if not response: + logger.error( + "Failed to make Microsoft 365 user -> '%s' (%s) owner of Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + group_name, + group_id, + ) + success = False - def check_external_system(self, external_system: dict) -> bool: - """Check if external system is reachable + # As M365 groups are flat (not nested) we also add the + # user as member to the parent groups of the current group + # if the parent group is enabled for M365: + parent_group_names = group.get("parent_groups") + logger.info( + "Group -> '%s' has the following parent groups -> %s", + group_name, + parent_group_names, + ) + for parent_group_name in parent_group_names: + # Find the group dictionary item to the parent group name: + parent_group = next( + ( + item + for item in self._groups + if item["name"] == parent_group_name + ), + None, + ) + if ( + parent_group is None + or not "enable_o365" in parent_group + or not parent_group["enable_o365"] + ): + # if parent group is not in payload then this membership + # is not relevant for Microsoft 365. + # If Microsoft 365 is not enabled for this parent group in + # the payload we can also skip: + logger.info( + "Parent Group -> '%s' is not enabled for M365. Skipping...", + group_name, + ) + continue - Args: - external_system (dict): payload data structure of external system. - We assume here that sanity check for - valid data is already done before. + response = self._m365.get_group(parent_group_name) + if ( + response is None + or not "value" in response + or not response["value"] + ): + logger.error( + "Microsoft 365 Group -> '%s' not found. Skipping...", + group_name, + ) + success = False + continue + parent_group_id = response["value"][0]["id"] + + # Check if user is already a member. We don't want + # to throw an error if the user is not found as a member: + if self._m365.is_member( + parent_group_id, m365_user_id, show_error=False + ): + logger.info( + "Microsoft 365 user -> '%s' (%s) is already in Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + parent_group_name, + parent_group_id, + ) + continue + + logger.info( + "Add Microsoft 365 user -> '%s' (%s) to Microsoft 365 group -> '%s' (%s)", + user["name"], + m365_user_id, + parent_group_name, + parent_group_id, + ) + self._m365.add_group_member(parent_group_id, m365_user_id) + # end for parent_group_name + # end for group name + # end for user + self.write_status_file(success, section_name, self._users) + + return success + # end method definition + + def process_teams_m365(self, section_name: str = "teamsM365") -> bool: + """Process groups in payload and create matching Teams in Microsoft 365. + We need to do this after the creation of the M365 users as we require + Group Owners to create teams. These are NOT the teams for Extended ECM + workspaces! Those are created by Scheduled Bots (Jobs) from Extended ECM! + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "webHooksPost") + This name is also used for the "success" status + files written to the Admin Personal Workspace Returns: - bool: True = system is reachable, False otherwise + bool: True if payload has been processed without errors, False otherwise """ - as_url = external_system["as_url"] + if not isinstance(self._m365, M365): + logger.error( + "Microsoft 365 connection not setup properly. Skipping payload section -> '%s'...", + section_name, + ) + return False - # Extract the hostname: - external_system_hostname = urlparse(as_url).hostname - # Write this information back into the data structure: - external_system["external_system_hostname"] = external_system_hostname - # Extract the port: - external_system_port = urlparse(as_url).port if urlparse(as_url).port else 80 - # Write this information back into the data structure: - external_system["external_system_port"] = external_system_port + if not self._groups: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True - if self._http_object.check_host_reachable( - external_system_hostname, external_system_port - ): - logger.info( - "Mark external system -> %s as reachable for later workspace creation and SAP RFC processing...", - external_system["external_system_name"], - ) - external_system["reachable"] = True + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): return True - else: - external_system["reachable"] = False - return False - def process_external_systems(self, section_name: str = "externalSystems") -> bool: - """Process external systems in payload and create them in Extended ECM. + success: bool = True - The payload section is a list of dicts (each representing one external - system) with these items: - { - enabled: True or False to enable or disable the payload item - external_system_name: Name of the external systen. - external_system_type: Type of the external system. - Possible values are - * SAP - * SuccessFactors - * Salesforce - * AppWorks Platform - base_url: Base URL of the external system - as_url: Application Server URL of the external system - username: (Technical) User Name for the connection - password: Passord of the (technical) user - oauth_client_id: OAuth client ID - oauth_client_secret: OAuth client secret - archive_logical_name: Logical name of Archive for SAP - archive_certificate_file: Path and filename to certificate file. - This file is inside the customizer - pof file system. - } - If OAuth Client ID and Client Secret are provided then username - and password are no longer used. + for group in self._groups: + if not "name" in group: + logger.error("Team needs a name. Skipping...") + success = False + continue + group_name = group["name"] - In the payload for SAP external systems there are additional - items "client", "destination" that are processed by init_sap() + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in group and not group["enabled"]: + logger.info( + "Payload for Group -> '%s' is disabled. Skipping...", group_name + ) + continue + if not "enable_o365" in group or not group["enable_o365"]: + logger.info( + "Microsoft 365 is not enabled in payload for Group -> '%s'. Skipping...", + group_name, + ) + continue + + # Check if the M365 group does not exist (this should actually never happen at this point) + # Check if the user does already exist in M365 (e.g. if job is restarted) + m365_group_id = self.determine_group_id_m365(group) + if not m365_group_id: + # The "m365_id" value is set by the method process_groups_m365() + logger.error( + "No M365 Group exist for group -> '%s' (M365 Group creation may have failed). Skipping...", + group_name, + ) + success = False + continue + + if self._m365.has_team(group_name): + logger.info( + "M365 group -> '%s' already has an MS Team connected. Skipping...", + group_name, + ) + continue + + logger.info( + "Create M365 Team -> '%s' for existing M365 Group -> '%s'...", + group_name, + group_name, + ) + # Now "upgrading" this group to a MS Team: + new_team = self._m365.add_team(group_name) + if not new_team: + success = False + continue + + self.write_status_file(success, section_name, self._groups) + + return success + + # end method definition + + def process_teams_m365_apps( + self, section_name: str = "teamsM365Apps", tab_name: str = "Extended ECM" + ) -> bool: + """Process groups in payload and configure Extended ECM Teams Apps + as a tab called "Extended ECM" in the "General" channel of the M365 Team. + We need to do this after the transports as we need top level folders + we can point the Extended ECM teams app to. Args: - section_name (str, optional): name of the section. - This name is used for the "success" status + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "webHooksPost") + This name is also used for the "success" status files written to the Admin Personal Workspace + tab_name (str, optional): Name of the Extended ECM tab. Default is "Extended ECM". Returns: bool: True if payload has been processed without errors, False otherwise - Side Effects: - - based on system_type different other settings in the dict are set - - reachability is tested and a flag is set in the payload dict """ - if not self._external_systems: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not isinstance(self._m365, M365): + logger.error( + "Microsoft 365 connection not setup properly. Skipping payload section -> '%s'...", + section_name, + ) + return False + + if not self._groups: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True - # WE DON'T WANT TO DO THIS AS WE NEED TO INITIALIZE - # DATASTRUCTURES LIKE self._sap and self._salesforce!! # If this payload section has been processed successfully before we # can return True and skip processing it once more: - # if self.check_status_file(section_name): - # return True + if self.check_status_file(section_name): + return True success: bool = True - for external_system in self._external_systems: - # - # 1: Do sanity checks for the payload: - # - if not "external_system_name" in external_system: - logger.error( - "External System connection needs a logical system name! Skipping to next external system..." - ) + # Determine the ID of the Extended ECM App: + app_name = self._m365.config()["teamsAppName"] + app_internal_id = self._m365.config()["teamsAppInternalId"] + if not app_internal_id: + response = self._m365.get_teams_apps( + filter_expression="contains(displayName, '{}')".format(app_name) + ) + # Get the App catalog ID: + app_internal_id = self._m365.get_result_value(response, "id", 0) + if not app_internal_id: + logger.error("M365 Teams App -> '%s' not found in catalog!", app_name) + return False + + for group in self._groups: + if not "name" in group: + logger.error("Team needs a name. Skipping...") success = False continue - system_name = external_system["external_system_name"] + group_name = group["name"] - if not "external_system_type" in external_system: - logger.error( - "External System connection -> %s needs a type (SAP, Salesfoce, SuccessFactors, AppWorks Platform)! Skipping to next external system...", - system_name, + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in group and not group["enabled"]: + logger.info( + "Payload for Group -> '%s' is disabled. Skipping...", group_name ) - success = False continue - system_type = external_system["external_system_type"] - - if "enabled" in external_system and not external_system["enabled"]: + if not "enable_o365" in group or not group["enable_o365"]: logger.info( - "Payload for External System -> %s (%s) is disabled. Skipping...", - system_name, - system_type, + "Microsoft 365 is not enabled in payload for Group -> '%s'. Skipping...", + group_name, ) continue - # Possible Connection Types for external systems: - # "Business Scenario Sample" (Business Scenarios Sample Adapter) - # "ot.sap.c4c.SpiAdapter" (SAP C4C SPI Adapter) - # "ot.sap.c4c.SpiAdapterV2" (C4C SPI Adapter V2) - # "HTTP" (Default WebService Adapter) - # "ot.sap.S4HANAAdapter" (S/4HANA SPI Adapter) - # "SF" (SalesForce Adapter) - # "SFInstance" (SFWebService) - - # Set the default settings for the different system types: - match system_type: - # Check if we have a SuccessFactors system: - case "SuccessFactors": - connection_type = "SFInstance" - auth_method = "OAUTH" - username = None - password = None - case "SAP": - connection_type = "HTTP" - auth_method = "BASIC" - oauth_client_id = None - oauth_client_secret = None - case "Salesforce": - connection_type = "SF" - auth_method = "OAUTH" - username = None - password = None - case "AppWorks Platform": - connection_type = "HTTP" - auth_method = "BASIC" - oauth_client_id = None - oauth_client_secret = None - case "Business Scenario Sample": - connection_type = "Business Scenario Sample" - auth_method = "BASIC" - oauth_client_id = None - oauth_client_secret = None - case _: - logger.error("Unsupported system_type defined -> %s", system_type) - return False + # + # Now we create a tab in the "General" channel for the Extended ECM Teams App + # - if not "base_url" in external_system: - base_url = "" # baseUrl is optional + # 1. Check if the tab is already assigned to the General channel. + # This determines if we need to create or update the tab / app: + app_name = self._m365.config()["teamsAppName"] + response = self._m365.get_team_channel_tabs( + team_name=group_name, channel_name="General" + ) + # Check if tab is already there: + if self._m365.exist_result_item( + response=response, key="displayName", value=tab_name + ): + logger.info( + "M365 Teams app -> '%s' is already configured for M365 Team -> '%s' (tab -> '%s' does already exist). Updating it with new URLs and IDs...", + app_name, + group_name, + tab_name, + ) + update = True # update existing tab else: - base_url = external_system["base_url"] - - if not "as_url" in external_system: - logger.warning( - "External System connection -> %s needs an Application Server URL! Skipping to next external system...", - system_name, + logger.info( + "Add tab -> '%s' with app -> '%s' to channel -> 'General' of M365 Team -> '%s' ", + tab_name, + app_name, + group_name, ) + update = False # create new tab + + # 2. Determine the M365 Team ID. If the team is not found then skip: + response = self._m365.get_team(group_name) + team_id = self._m365.get_result_value(response, "id", 0) + if not team_id: + logger.error("M365 Team -> '%s' not found!", group_name) success = False continue - as_url = external_system["as_url"] - # Check if external system is reachable and - # update the payload dict with a "reachable" key/value pair: - self.check_external_system(external_system) + # 3. Install the App for the particular M365 Team (if it is not yet installed): + response = self._m365.get_teams_apps_of_team( + team_id=team_id, + filter_expression="contains(teamsAppDefinition/displayName, '{}')".format( + app_name + ), + ) + if self._m365.exist_result_item( + response=response, + key="displayName", + value=app_name, + sub_dict_name="teamsAppDefinition", + ): + logger.info( + "M365 Teams App -> '%s' is already installed for M365 Team -> '%s' (%s). Trying to upgrade app...", + app_name, + group_name, + team_id, + ) + response = self._m365.upgrade_teams_app_of_team(team_id, app_name) + if not response: + logger.error( + "Failed to upgrade the existing app -> '%s' for the M365 Team -> '%s'", + app_name, + group_name, + ) + success = False + continue + else: + logger.info( + "Install M365 Teams app -> '%s' for M365 team -> '%s'", + app_name, + group_name, + ) + response = self._m365.assign_teams_app_to_team( + team_id=team_id, app_id=app_internal_id + ) + if not response: + logger.error( + "Failed to install App -> '%s' (%s) for M365 Team -> '%s'", + app_name, + app_internal_id, + group_name, + ) + success = False + continue - # Read either username/password (BASIC) or client ID / secret (OAuth) - match auth_method: - case "BASIC": - if not "username" in external_system: - logger.warning( - "External System connection -> %s needs a user name for BASIC authentication! Skipping to next external system...", - system_name, - ) - continue - if not "password" in external_system: - logger.warning( - "External System connection -> %s needs a password for BASIC authentication! Skipping to next external system...", - system_name, - ) - continue - username = external_system["username"] - password = external_system["password"] - oauth_client_id = "" - oauth_client_secret = "" - - case "OAUTH": - if not "oauth_client_id" in external_system: - logger.error( - "External System connection -> %s is missing OAuth client ID! Skipping to next external system...", - system_name, - ) - success = False - continue - if not "oauth_client_secret" in external_system: - logger.error( - "External System connection -> %s is missing OAuth client secret! Skipping to next external system...", - system_name, - ) - success = False - continue - oauth_client_id = external_system["oauth_client_id"] - oauth_client_secret = external_system["oauth_client_secret"] - # For backward compatibility we also read username/password - # with OAuth settings: - username = ( - external_system["username"] - if external_system.get("username") - else None - ) - password = ( - external_system["password"] - if external_system.get("password") - else None - ) - case _: - logger.error( - "Unsupported authorization method specified (%s) , Skipping ... ", - auth_method, + # 4. Create a Tab in the "General" channel of the M365 Team: + if group_name == "Innovate": + # Use the Enterprise Workspace for the + # top-level group "Innovate": + node_id = 2000 + else: + # We assume the departmental group names are identical to + # top-level folders in the Enterprise volume + node = self._otcs.get_node_by_parent_and_name(2000, group_name) + node_id = self._otcs.get_result_value(node, "id") + if not node_id: + logger.warning( + "Cannot find a top-level container for group -> '%s'. Cannot configure M365 Teams app. Skipping...", + group_name, ) - return False + continue - # We do this existance test late in this function to make sure the payload - # datastructure is properly updated for debugging purposes. - logger.info( - "Test if external system -> %s does already exist...", system_name + app_url = ( + self._otcs_frontend.cs_support_public_url() + ) # it is important to use the frontend pod URL here + app_url += "/xecmoffice/teamsapp.html?nodeId=" + app_url += ( + str(node_id) + "&type=container&parentId=2000&target=content&csurl=" ) - if self._otcs.get_external_system_connection(system_name): + app_url += self._otcs_frontend.cs_public_url() + app_url += "&appId=" + app_internal_id + + if update: + # App / Tab exist but needs to be updated with new + # IDs for the new deployment of Extended ECM + # as the M365 Teams survive between Terrarium deployments: + logger.info( - "External System connection -> %s already exists!", - system_name, + "Updating tab -> '%s' of M365 Team channel -> 'General' for app -> '%s' (%s) with new URLs and node IDs", + tab_name, + app_name, + app_internal_id, ) - # This is for handling re-runs of customizer pod where the transports - # are skipped and thus self._sap or self._salesforce may not be initialized: - if system_type == "SAP" and not self._sap: - logger.info( - "Re-Initialize SAP connection for external system -> %s.", - system_name, - ) - # Initialize SAP object responsible for communication to SAP: - self._sap = self.init_sap(external_system) - if system_type == "Salesforce" and not self._salesforce: - logger.info( - "Re-Initialize Salesforce connection for external system -> %s.", - system_name, - ) - # Initialize Salesforce object responsible for communication to Salesforce: - self._salesforce = self.init_salesforce(external_system) - logger.info("Skip to next external system...") - continue - # - # Create External System: - # - logger.info( - "Create external system -> %s; type -> %s", system_name, connection_type - ) - response = self._otcs.add_external_system_connection( - connection_name=system_name, - connection_type=connection_type, - as_url=as_url, - base_url=base_url, - username=str(username), - password=str(password), - authentication_method=auth_method, - client_id=oauth_client_id, - client_secret=oauth_client_secret, - ) - if response is None: - logger.error( - "Failed to create external system -> %s; type -> %s", - system_name, - connection_type, + response = self._m365.update_teams_app_of_channel( + team_name=group_name, + channel_name="General", + tab_name=tab_name, + app_url=app_url, + cs_node_id=node_id, ) - success = False else: - logger.info("Successfully created external system -> %s", system_name) + # Tab does not exist in "General" channel so we + # add / configure it freshly: - # - # In case of an SAP external system we also initialize the SAP object - # and do some SAP-specific Archiving config: - # - if system_type == "SAP": - # Initialize SAP object responsible for communication to SAP: - self._sap = self.init_sap(external_system) - if ( - "archive_logical_name" in external_system - and "archive_certificate_file" in external_system - and self._otac - ): - logger.info( - "Put certificate file -> %s for logical archive -> %s into Archive Center", - external_system["archive_certificate_file"], - external_system["archive_logical_name"], - ) - response = self._otac.put_cert( - external_system["external_system_name"], - external_system["archive_logical_name"], - external_system["archive_certificate_file"], - ) - logger.info( - "Enable certificate file -> %s for logical archive -> %s", - external_system["archive_certificate_file"], - external_system["archive_logical_name"], - ) - response = self._otac.enable_cert( - external_system["external_system_name"], - external_system["archive_logical_name"], - True, - ) + logger.info( + "Adding tab -> '%s' with app -> '%s' (%s) in M365 Team channel -> 'General'", + tab_name, + app_name, + app_internal_id, + ) - # - # In case of an Salesforce external system we also initialize the Salesforce object - # - if system_type == "Salesforce": - # Initialize Salesforce object responsible for communication to Salesforce: - self._salesforce = self.init_salesforce(external_system) + response = self._m365.add_teams_app_to_channel( + team_name=group_name, + channel_name="General", + app_id=app_internal_id, + tab_name=tab_name, + app_url=app_url, + cs_node_id=node_id, + ) + if not response: + logger.error( + "Failed to add tab -> '%s' with app -> '%s' (%s) to M365 Team channel -> 'General'", + tab_name, + app_name, + app_internal_id, + ) - self.write_status_file(success, section_name, self._external_systems) + self.write_status_file(success, section_name, self._groups) return success - # end method definition + # end method definition - def process_transport_packages( - self, transport_packages: list, section_name: str = "transportPackages" - ) -> bool: - """Process transport packages in payload and import them to Extended ECM. + def cleanup_stale_teams_m365(self, workspace_types: list) -> bool: + """Delete Microsoft Teams that are left-overs from former deployments. + This method is currently not used. Args: - transport_packages (list): list of transport packages. As we - have three different lists (transport, - content_transport, transport_post) so - we need a parameter - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections like "transportPackagesPost") - This name is also used for the "success" status - files written to the Admin Personal Workspace + workspace_types (list): list of all workspace types Returns: - bool: True if payload has been processed without errors, False otherwise + bool: True if successful, False otherwise """ - if not transport_packages: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True - - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + if not isinstance(self._m365, M365): + logger.error( + "Microsoft 365 connection not setup properly. Skipping cleanup...", + ) + return False - success: bool = True + if workspace_types == []: + logger.error("Empty workspace type list!") + return False - for transport_package in transport_packages: - if not "name" in transport_package: + for workspace_type in workspace_types: + if not "name" in workspace_type: logger.error( - "Transport Package needs a name! Skipping to next transport..." + "Workspace type -> '%s' does not have a name. Skipping...", + workspace_type, ) - success = False continue - name = transport_package["name"] - - if "enabled" in transport_package and not transport_package["enabled"]: + response = self._otcs.get_workspace_instances(workspace_type["name"]) + workspace_instances = response["results"] + if not workspace_instances: logger.info( - "Payload for Transport Package -> %s is disabled. Skipping...", name + "Workspace type -> '%s' does not have any instances!", + workspace_type["name"], ) continue - - if not "url" in transport_package: - logger.error( - "Transport Package -> %s needs a URL! Skipping to next transport...", - name, + for workspace_instance in workspace_instances: + workspace_name = workspace_instance["data"]["properties"]["name"] + logger.info( + "Check if stale Microsoft 365 Teams with name -> '%s' exist...", + workspace_name, ) - success = False - continue - if not "description" in transport_package: - logger.warning("Transport Package -> %s is missing a description", name) - url = transport_package["url"] - description = transport_package["description"] - - # For some transports there can be string replacements - # configured: - if "replacements" in transport_package: - replacements = transport_package["replacements"] - else: - replacements = None - - # For some transports there can be data extractions - # configured: - if "extractions" in transport_package: - extractions = transport_package["extractions"] - else: - extractions = None - - logger.info("Deploy transport -> %s; URL -> %s", description, url) - if replacements: - logger.info("Use replacements -> %s", str(replacements)) - if extractions: - logger.info("Use extractions -> %s", str(extractions)) - - response = self._otcs.deploy_transport( - url, name, description, replacements, extractions - ) - if response is None: - logger.error("Failed to deploy transport -> %s; URL -> %s", name, url) - success = False - if self._stop_on_error: - break - else: - logger.info("Successfully deployed transport -> %s", name) - # Save the extractions for later processing, e.g. in process_business_object_types() - if extractions: - self.add_transport_extractions(extractions) - - self.write_status_file(success, section_name, transport_packages) - self.write_status_file( - success, section_name + "Extractions", self._transport_extractions - ) + response = self._m365.delete_teams(workspace_name) - return success + return True - # end method definition + # end method definition - def process_user_photos(self, section_name: str = "userPhotos") -> bool: - """Process user photos in payload and assign them to Extended ECM users. + def cleanup_all_teams_m365(self, section_name: str = "teamsM365Cleanup") -> bool: + """Delete Microsoft Teams that are left-overs from former deployments Args: section_name (str, optional): name of the section. This name is used for the "success" status files written to the Admin Personal Workspace Returns: - bool: True if payload has been processed without errors, False otherwise + bool: True if teams have been deleted, False otherwise """ - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + if not isinstance(self._m365, M365): + logger.error( + "Microsoft 365 connection not setup properly. Skipping payload section -> '%s'...", + section_name, + ) + return False - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): + # We want this cleanup to only run once even if we have + # multiple payload files - so we pass payload_specific=False here: + if self.check_status_file( + payload_section_name=section_name, payload_specific=False + ): + logger.info( + "Payload section -> '%s' has been processed successfully before. Skip cleanup of M365 teams...", + section_name, + ) return True - success: bool = True - - # we assume the nickname of the photo item equals the login name of the user - # we also assume that the photos have been uploaded / transported into the target system - for user in self._users: - user_name = user["name"] - - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in user and not user["enabled"]: - logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name - ) - continue + logger.info("Processing payload section -> '%s'...", section_name) - if not "id" in user: - logger.error( - "User -> %s does not have an ID. The user creation may have failed before. Skipping...", - user_name, - ) - success = False - continue + # We don't want to delete MS Teams that are matching the regular OTCS Group Names (like "Sales") + exception_list = self.get_all_group_names() - user_id = user["id"] + # These are the patterns that each MS Teams needs to match at least one of to be deleted + # Pattern 1: all MS teams with a name that has a number in brackets, like "(1234)" + # Pattern 2: all MS Teams with a name that starts with a number followed by a space, + # followed by a "-" and followed by another space + # Pattern 3: all MS Teams with a name that starts with "WS" and a 1-4 digit number + # (these are the workspaces for Purchase Contracts generated for Intelligent Filing) + # Pattern 4: all MS Teams with a name that ends with a 1-2 character + a number in brackets, like (US-1000) + # this is a specialization of pattern 1 + # Pattern 5: remove the teams that are created for the dummy copy&paste template for the + # Intelligent Filing workspaces + pattern_list = [ + r"\(\d+\)", + r"\d+\s-\s", + r"^WS\d{1,4}$", + r"^.+?\s\(.{1,2}-\d+\)$", + r"Purchase\sContract\s\(Template\sfor\sIntelligent\sFiling\)", + r"^OpenText.*$", + r"^P-100.*$", + r"^OILRIG.*$", + r"^AGILUM.*$", + r"^HD-102T.*$", + r"^SG325A.*$", + ] - response = self._otcs.get_node_from_nickname(user_name) - if response is None: - logger.warning( - "Missing photo for user -> %s - nickname not found. Skipping...", - user_name, - ) - continue - photo_id = self._otcs.get_result_value(response, "id") - response = self._otcs.update_user_photo(user_id, photo_id) - if not response: - logger.error("Failed to add photo for user -> %s", user_name) - success = False - else: - logger.info("Successfully added photo for user -> %s", user_name) + result = self._m365.delete_all_teams(exception_list, pattern_list) - # Check if Admin has a photo as well (nickname needs to be "admin"): - response = self._otcs.get_node_from_nickname("admin") - if response is None: - logger.warning("Missing photo for admin - nickname not found. Skipping...") - else: - photo_id = self._otcs.get_result_value(response, "id") - response = self._otcs.update_user_photo(1000, photo_id) - if response is None: - logger.warning("Failed to add photo for admin") - else: - logger.info("Successfully added photo for admin") + # We want this cleanup to only run once even if we have + # multiple payload files - so we pass payload_specific=False here: + self.write_status_file( + success=True, + payload_section_name=section_name, + payload_section=exception_list + pattern_list, + payload_specific=False, + ) - self.write_status_file(success, section_name, self._users) + return result - return success + # end method definition - # end method definition + def process_admin_settings( + self, admin_settings: list, section_name: str = "adminSettings" + ) -> bool: + """Process admin settings in payload and import them to Extended ECM. - def process_user_photos_m365(self, section_name: str = "userPhotosM365") -> bool: - """Process user photos in payload and assign them to Microsoft 365 users. + The payload section is a list of dicts with these items: + { + enabled: True or False to enable or disable the payload item + filename: The filename of the XML file with admin settings. + It needs to be the plain filename like "admin.xml". + The files reside inside the container in /settings root + directory. They are placed there by the Terraform automation + and are taken from the ./settings/payload directory. + description: Some description about the purpose of the settings. + Just for information and optional. + } Args: - section_name (str, optional): name of the section. + admin_settings (list): list of admin settings. We need this parameter + as we process two different lists. + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "adminSettingsPost") This name is also used for the "success" status files written to the Admin Personal Workspace Returns: - bool: True if payload has been processed without errors, False otherwise + bool: True if a restart of the OTCS pods is required. False otherwise. """ - if not isinstance(self._m365, M365): - logger.error( - "Office 365 connection not setup properly. Skipping payload section -> %s...", - section_name, - ) - return False - - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + if not admin_settings: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return False # important to return False here as otherwise we are triggering a restart of services!! # If this payload section has been processed successfully before we - # can return True and skip processing it once more: + # can return False and skip processing it once more: if self.check_status_file(section_name): - return True + return False # important to return False here as otherwise we are triggering a restart of services!! + restart_required: bool = False success: bool = True - # we assume the nickname of the photo item equals the login name of the user - # we also assume that the photos have been uploaded / transported into the target system - for user in self._users: - user_name = user["name"] + for admin_setting in admin_settings: + # Sanity checks: + if not "filename" in admin_setting: + logger.error("Filename is missing. Skipping to next admin setting...") + continue + filename = admin_setting["filename"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in user and not user["enabled"]: + if "enabled" in admin_setting and not admin_setting["enabled"]: logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + "Payload for setting file -> '%s' is disabled. Skipping...", + filename, ) continue - if not "id" in user: - logger.error( - "User -> %s does not have an ID. The user creation may have failed before. Skipping...", - user_name, - ) - success = False - continue + settings_file = self._custom_settings_dir + filename + if os.path.exists(settings_file): + description = admin_setting.get("description") + if description: + logger.info(description) - if not "enable_o365" in user or not user["enable_o365"]: - logger.info( - "Microsoft 365 is not enabled in payload for User -> %s. Skipping...", - user_name, + # Read the config file: + with open(settings_file, "r", encoding="utf-8") as file: + file_content = file.read() + + logger.debug( + "Replace Placeholder -> '%s' in file -> %s", + self._placeholder_values, + file_content, ) - continue - # If the customizer pod is restarted it may be that - # the M365 user exists even if the M365 user ID is - # not yet written back into the payload. So we use the - # determine_user_id_m365() method that handles both cases - # and updates the payload if the user exists in M365 already. - user_m365_id = self.determine_user_id_m365(user) - if not user_m365_id: - logger.error("M365 user -> %s does not exist. Skipping...", user_name) - success = False - continue + file_content = self.replace_placeholders(file_content) - if self._m365.get_user_photo(user_m365_id, show_error=False): - logger.info( - "User -> %s (%s) has already a photo in Microsoft 365. Skipping...", - user_name, - user_m365_id, - ) - continue - else: - logger.info( - "User -> %s (%s) has not yet a photo in Microsoft 365. Uploading...", - user_name, - user_m365_id, - ) + # Write the updated config file: + tmpfile = "/tmp/" + os.path.basename(settings_file) + with open(tmpfile, "w", encoding="utf-8") as file: + file.write(file_content) - response = self._otcs.get_node_from_nickname(user_name) - if response is None: - logger.warning( - "Missing photo for user -> %s - nickname not found. Skipping...", - user_name, - ) - continue - photo_id = self._otcs.get_result_value(response, "id") - photo_name = self._otcs.get_result_value(response, "name") - photo_path = "/tmp/" + str(photo_name) - response = self._otcs.download_document(photo_id, photo_path) - if response is None: - logger.warning( - "Failed to download photo for user -> %s from Extended ECM", - user_name, - ) - success = False - continue + response = self._otcs.apply_config(tmpfile) + if response and response["results"]["data"]["restart"]: + logger.info("A restart of Extended ECM service is required.") + restart_required = True else: - logger.info( - "Successfully downloaded photo for user -> %s from Extended ECM to file -> %s", - user_name, - photo_path, - ) - - # Upload photo to M365: - response = self._m365.update_user_photo(user_m365_id, photo_path) - if response is None: - logger.error( - "Failed to upload photo for user -> %s to Microsoft 365", user_name - ) + logger.error("Admin settings file -> '%s' not found.", settings_file) success = False - else: - logger.info( - "Successfully uploaded photo for user -> %s to Microsoft 365", - user_name, - ) - # Check if Admin has a photo as well (nickname needs to be "admin") - # Then we want this to be applied in M365 as well: - response = self._otcs.get_node_from_nickname("admin") - if response is None: - logger.warning("Missing photo for admin - nickname not found. Skipping...") - else: - photo_id = self._otcs.get_result_value(response, "id") - photo_name = self._otcs.get_result_value(response, "name") - photo_path = "/tmp/" + str(photo_name) - response = self._otcs.download_document(photo_id, photo_path) - if response is None: - logger.warning( - "Failed to download photo for admin user from Extended ECM", - ) - success = False - else: - logger.info( - "Successfully downloaded photo for admin from Extended ECM to file -> %s", - photo_path, - ) - m365_admin_email = "admin@" + self._m365.config()["domain"] - response = self._m365.update_user_photo(m365_admin_email, photo_path) - if response is None: - logger.warning("Failed to add photo for %s", m365_admin_email) - else: - logger.info("Successfully added photo for %s", m365_admin_email) + self.write_status_file(success, section_name, admin_settings) - self.write_status_file(success, section_name, self._users) + return restart_required - return success + # end method definition - # end method definition + def check_external_system(self, external_system: dict) -> bool: + """Check if external system is reachable - def process_business_object_types( - self, section_name: str = "businessObjectTypes" - ) -> list: - """Create a data structure for all business object types in the Extended ECM system. + Args: + external_system (dict): payload data structure of external system. + We assume here that sanity check for + valid data is already done before. + + Returns: + bool: True = system is reachable, False otherwise + """ + + as_url = external_system["as_url"] + + # Extract the hostname: + external_system_hostname = urlparse(as_url).hostname + # Write this information back into the data structure: + external_system["external_system_hostname"] = external_system_hostname + # Extract the port: + external_system_port = urlparse(as_url).port if urlparse(as_url).port else 80 + # Write this information back into the data structure: + external_system["external_system_port"] = external_system_port + + if self._http_object.check_host_reachable( + external_system_hostname, external_system_port + ): + logger.info( + "Mark external system -> '%s' as reachable for later workspace creation and user synchronization...", + external_system["external_system_name"], + ) + external_system["reachable"] = True + return True + else: + external_system["reachable"] = False + return False + + # end method definition + + def process_external_systems(self, section_name: str = "externalSystems") -> bool: + """Process external systems in payload and create them in Extended ECM. + + The payload section is a list of dicts (each representing one external + system) with these items: + { + enabled: True or False to enable or disable the payload item + external_system_name: Name of the external systen. + external_system_type: Type of the external system. + Possible values are + * SAP + * SuccessFactors + * Salesforce + * AppWorks Platform + * Business Scenario Sample + base_url: Base URL of the external system + as_url: Application Server URL of the external system + username: (Technical) User Name for the connection + password: Passord of the (technical) user + oauth_client_id: OAuth client ID + oauth_client_secret: OAuth client secret + archive_logical_name: Logical name of Archive for SAP + archive_certificate_file: Path and filename to certificate file. + This file is inside the customizer + pof file system. + skip_connection_test: Should we skip the connection test for this + external system? + } + If OAuth Client ID and Client Secret are provided then username + and password are no longer used. + + In the payload for SAP external systems there are additional + items "client", "destination" that are processed by init_sap() Args: section_name (str, optional): name of the section. This name is used for the "success" status files written to the Admin Personal Workspace Returns: - list: list of business object types. Each list element is a dict with these values: - - id (str) - - name (str) - - type (str) - - ext_system_id (str) - - business_properties (list) - - business_property_groups (list) + bool: True if payload has been processed without errors, False otherwise + Side Effects: + - based on system_type different other settings in the dict are set + - reachability is tested and a flag is set in the payload dict """ + if not self._external_systems: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # WE DON'T WANT TO DO THIS AS WE NEED TO INITIALIZE + # DATASTRUCTURES LIKE self._sap and self._salesforce!! # If this payload section has been processed successfully before we - # still need to read the data structure from the status file and - # initialize self._workspace_types: - if self.check_status_file(section_name): - # read the list from the json file in admin Home - # this is important for restart of customizer pod - # as this data structure is used later on for workspace processing - logger.info( - "Re-Initialize business object types list from status file -> %s for later use...", - self.get_status_file_name(payload_section_name=section_name), - ) - self._business_object_types = self.get_status_file(section_name) - logger.info( - "Found -> %s business object types.", - str(len(self._business_object_types)), - ) - logger.debug( - "Business object types -> %s", str(self._business_object_types) - ) - return self._business_object_types + # can return True and skip processing it once more: + # if self.check_status_file(section_name): + # return True success: bool = True - # get all workspace types (these have been created by the transports and are not in the payload!) - # we need to do this each time as it needs to work across potential multiple payload files... - response = self._otcs.get_business_object_types() - if response is None: - logger.info("No business object types found!") - self._business_object_types = [] - else: - self._business_object_types = response["results"] - logger.info( - "Found -> %s business object types.", - str(len(self._business_object_types)), - ) - logger.debug( - "Business object types -> %s", str(self._business_object_types) - ) - - # now we enrich the workspace_type list elments (which are dicts) - # with additional dict elements for further processing: - for business_object_type in self._business_object_types: - # Get BO Type (e.g. KNA1): - bo_type = business_object_type["data"]["properties"]["bo_type"] - logger.info("Business Object Type -> %s", bo_type) - business_object_type["type"] = bo_type - # Get BO Type ID: - bo_type_id = business_object_type["data"]["properties"]["bo_type_id"] - logger.info("Business Object Type ID -> %s", bo_type_id) - business_object_type["id"] = bo_type_id - # Get BO Type Name: - bo_type_name = business_object_type["data"]["properties"]["bo_type_name"] - logger.info("Business Object Type Name -> %s", bo_type_name) - business_object_type["name"] = bo_type_name - # Get External System ID: - ext_system_id = business_object_type["data"]["properties"]["ext_system_id"] - logger.info("External System ID -> %s", ext_system_id) - business_object_type["ext_system_id"] = ext_system_id - - # Get additional information per BO Type (this REST API is severly - # limited) - it does not return Property names from External System - # and is also missing Business Property Groups: - # if not "/" in bo_type: - # response = self._otcs.get_business_object_type( - # external_system_id=ext_system_id, type_name=bo_type - # ) - # if response is None or not response["results"]: - # logger.warning( - # "Cannot retrieve additional information for business object type -> %s. Skipping...", - # bo_type, - # ) - # continue - # business_properties = response["results"]["data"][ - # "business_object_type" - # ]["data"]["businessProperties"] - # business_object_type["business_properties"] = business_properties - # else: - # logger.warning( - # "Business Object Type -> %s does not have a proper name to call REST API.", - # bo_type, - # ) - # business_object_type["business_properties"] = [] + for external_system in self._external_systems: + # + # 1: Do sanity checks for the payload: + # + if not "external_system_name" in external_system: + logger.error( + "External System connection needs a logical system name! Skipping to next external system..." + ) + success = False + continue + system_name = external_system["external_system_name"] - business_object_type["business_properties"] = [] - business_object_type["business_property_groups"] = [] + if not "external_system_type" in external_system: + logger.error( + "External System connection -> '%s' needs a type (SAP, Salesfoce, SuccessFactors, AppWorks Platform)! Skipping to next external system...", + system_name, + ) + success = False + continue + system_type = external_system["external_system_type"] - # Now we complete the data with what we have extracted from the transport packages - # for Business Object Types. This is a workaround for the insufficient REST API - # implementation (see otcs.get_business_object_type) - if self._transport_extractions: + if "enabled" in external_system and not external_system["enabled"]: logger.info( - "Enrich Business Object Types with data extractions from transport packages (found %s extractions)...", - str(len(self._transport_extractions)), + "Payload for External System -> '%s' (%s) is disabled. Skipping...", + system_name, + system_type, ) + continue + + # Possible Connection Types for external systems: + # "Business Scenario Sample" (Business Scenarios Sample Adapter) + # "ot.sap.c4c.SpiAdapter" (SAP C4C SPI Adapter) + # "ot.sap.c4c.SpiAdapterV2" (C4C SPI Adapter V2) + # "HTTP" (Default WebService Adapter) + # "ot.sap.S4HANAAdapter" (S/4HANA SPI Adapter) + # "SF" (SalesForce Adapter) + # "SFInstance" (SFWebService) + + # Set the default settings for the different system types: + match system_type: + # Check if we have a SuccessFactors system: + case "SuccessFactors": + connection_type = "SFInstance" + auth_method = "OAUTH" + username = None + password = None + case "SAP": + connection_type = "HTTP" + auth_method = "BASIC" + oauth_client_id = None + oauth_client_secret = None + case "Salesforce": + connection_type = "SF" + auth_method = "OAUTH" + username = None + password = None + case "AppWorks Platform": + connection_type = "HTTP" + auth_method = "BASIC" + oauth_client_id = None + oauth_client_secret = None + case "Business Scenario Sample": + connection_type = "Business Scenario Sample" + auth_method = "BASIC" + oauth_client_id = None + oauth_client_secret = None + case _: + logger.error("Unsupported system_type defined -> '%s'", system_type) + return False + + if not "base_url" in external_system: + base_url = "" # baseUrl is optional else: - logger.info( - "No transport extractions are recorded. This may be because of customizer restart." + base_url = external_system["base_url"] + + if not "as_url" in external_system: + logger.warning( + "External System connection -> '%s' needs an Application Server URL! Skipping to next external system...", + system_name, ) - extraction_status_file = "transportPackagesExtractions" - if self.check_status_file(extraction_status_file): - logger.info( - "Try to load extractions from success file -> %s...", - extraction_status_file, - ) - self._transport_extractions = self.get_status_file( - extraction_status_file - ) + success = False + continue + as_url = external_system["as_url"] - for extraction in self._transport_extractions: - xpath = extraction.get("data") - data_list = extraction.get("data") - if not data_list: - logger.error( - "Extraction -> %s is missing the data element. Skipping...", - xpath, - ) - success = False - continue - if not isinstance(data_list, list): + logger.info( + "Processing external system -> '%s' (type -> '%s', connection type -> '%s', endpoint -> '%s')", + system_name, + system_type, + connection_type, + as_url, + ) + + skip_connection_test = external_system.get("skip_connection_test", False) + # If skip_connection_test is False, run the external system check + if not skip_connection_test: + # Check if external system is reachable and + # update the payload dict with a "reachable" key/value pair: + if not self.check_external_system(external_system): logger.warning( - "Extracted data for -> %s is not a list. Cannot process it. Skipping...", - xpath, + "External System connection -> '%s' (%s) is not reachable! Skipping to next external system...", + system_name, + system_type, ) + success = False continue + else: + logger.info( + "skip_connection_test is %s; Skipping external system check for %s...", + skip_connection_test, + system_name) - # The following loop processes a dictionasry of this structure: - - # llnode: { - # '@created': '2017-11-23T16:43:35', - # '@createdby': '1000', - # '@createdbyname': 'Terrarium Admin', - # '@description': '', - # '@id': '16013', - # '@modified': '2023-12-09T12:08:21', - # '@name': 'SFDC Order', - # '@objname': 'Business Object Type', - # '@objtype': '889', - # '@ownedby': '1000', - # '@ownedbyname': 'Terrarium Admin', - # '@parentguid': '95F96645-057D-4EAF-9083-BE9F24C0CB6C', - # '@parentid': '2898', - # '@parentname': 'Business Object Types', - # ... - # 'Nickname': {'@domain': ''}, - # 'name': {'@xml:lang': 'en', '#text': 'SFDC Order'}, - # 'description': {'@xml:lang': 'en'}, - # 'businessObjectTypeInfo': { - # 'basicInfo': { - # '@businessObjectId': '9', - # '@businessobjectType': 'Order', - # '@deleted': 'false', - # '@name': 'SFDC Order', - # '@subtype': '889', - # '@useBusWorkspace': 'true', - # 'displayUrl': {...} - # }, - # 'businessApplication': { - # 'businessObjectTypeReference': {...}}, - # 'businessAttachmentInfo': { - # '@automaticAddingOfBusinessObject': 'false', - # '@canbeAddedAsBusinessObject': 'false', - # '@enableBADIBeforeAddingBO': 'false', - # '@enableBADIBeforeRemovingBO': 'false', - # '@enableMetadataMapping': 'false' - # }, - # 'managedObjectTypes': { - # 'managedObjectType': [...] - # }, - # 'multilingualNames': {'language': [...]}, - # 'callbacks': {'callback': [...]}, - # 'workspaceTypeReference': {'@isDefaultDisplay': 'false', '@isDefaultSearch': 'false', 'businessObjectTypeReference': {...}}, - # 'businessPropertyMappings': { - # 'propertyMapping': [...] - # }, - # 'businessPropertyGroupMappings': { - # 'propertyGroupMapping': [...] - # }, - # 'documentTypes': { - # 'documentType': [...] - # }, - # 'CustomBOTypeInfo': None - # } - # } - - for data in data_list: - # - # Level 1: llnode - # - llnode = data.get("llnode") - if not llnode: - logger.error("Missing llnode structure in data. Skipping...") - success = False + # Read either username/password (BASIC) or client ID / secret (OAuth) + match auth_method: + case "BASIC": + if not "username" in external_system: + logger.warning( + "External System connection -> '%s' needs a user name for BASIC authentication! Skipping to next external system...", + system_name, + ) continue - - # - # Level 2: businessobjectTypeInfo - # - business_object_type_info = llnode.get( - "businessobjectTypeInfo", None - ) - if not business_object_type_info: - logger.error( - "Information is missing for Business Object Type -> %s. Skipping...", - bo_type_name, + if not "password" in external_system: + logger.warning( + "External System connection -> '%s' needs a password for BASIC authentication! Skipping to next external system...", + system_name, ) - success = False continue + username = external_system["username"] + password = external_system["password"] + oauth_client_id = "" + oauth_client_secret = "" - # Check if this extraction is for the current business object type: - basic_info = business_object_type_info.get("basicInfo", None) - if not basic_info: + case "OAUTH": + if not "oauth_client_id" in external_system: logger.error( - "Cannot find Basic Info of Business Object Type -> %s. Skipping...", - bo_type_name, + "External System connection -> '%s' is missing OAuth client ID! Skipping to next external system...", + system_name, ) success = False continue - name = basic_info.get("@businessobjectType", "") - if not name: + if not "oauth_client_secret" in external_system: logger.error( - "Cannot find name of Business Object Type -> %s. Skipping...", - bo_type_name, + "External System connection -> '%s' is missing OAuth client secret! Skipping to next external system...", + system_name, ) success = False continue - obj_type = llnode.get("@objtype", None) - # we need to compare bo_type and NOT bo_type_name here! - # Otherwise we don't find the SAP and SuccessFactors data: - if name != bo_type or obj_type != "889": - continue - - # - # Level 3: businessPropertyMappings - plain, non-grouped properties - # - business_property_mappings = business_object_type_info.get( - "businessPropertyMappings", None + oauth_client_id = external_system["oauth_client_id"] + oauth_client_secret = external_system["oauth_client_secret"] + # For backward compatibility we also read username/password + # with OAuth settings: + username = ( + external_system["username"] + if external_system.get("username") + else None ) - if not business_property_mappings: - logger.info( - "No Property Mapping for Business Object Type -> %s. Skipping...", - bo_type_name, - ) - else: - property_mappings = business_property_mappings.get( - "propertyMapping", [] - ) - # This can happen if there's only 1 propertyMapping; - if not isinstance(property_mappings, list): - logger.info( - "Found a single property mapping in a dictionary (not in a list). Package it into a list...", - ) - property_mappings = [property_mappings] - - for property_mapping in property_mappings: - property_name = property_mapping.get("@propertyName") - attribute_name = property_mapping.get("@attributeName") - category_id = property_mapping.get("@categoryId") - mapping_type = property_mapping.get("@type") - logger.info( - "%s Property Mapping for Business Object -> %s: property -> %s is mapped to attribute -> %s (category -> %s)", - mapping_type, - bo_type_name, - property_name, - attribute_name, - category_id, - ) - business_object_type["business_properties"].append( - property_mapping - ) - - # - # Level 3: businessPropertyGroupMappings - grouped properties - # - business_property_group_mappings = business_object_type_info.get( - "businessPropertyGroupMappings", None + password = ( + external_system["password"] + if external_system.get("password") + else None ) - if not business_property_group_mappings: - logger.info( - "No Property Group Mapping for Business Object Type -> %s. Skipping...", - bo_type_name, - ) - continue + case _: + logger.error( + "Unsupported authorization method specified (%s) , Skipping ... ", + auth_method, + ) + return False + # end match - property_group_mappings = business_property_group_mappings.get( - "propertyGroupMapping", [] + # We do this existance test late in this function to make sure the payload + # datastructure is properly updated for debugging purposes. + logger.info( + "Test if external system -> '%s' does already exist...", system_name + ) + if self._otcs.get_external_system_connection(system_name): + logger.info( + "External System connection -> '%s' already exists!", + system_name, + ) + # This is for handling re-runs of customizer pod where the transports + # are skipped and thus self._sap or self._salesforce may not be initialized: + if system_type == "SAP" and not self._sap: + logger.info( + "Re-Initialize SAP connection for external system -> '%s'.", + system_name, ) - # This can happen if there's only 1 propertyMapping; - if isinstance(property_group_mappings, dict): - logger.info( - "Found a single property group mapping in a dictionary (not in a list). Pack it into a list...", - ) - property_group_mappings = [property_group_mappings] + # Initialize SAP object responsible for communication to SAP: + self._sap = self.init_sap(external_system) + if system_type == "Salesforce" and not self._salesforce: + logger.info( + "Re-Initialize Salesforce connection for external system -> '%s'.", + system_name, + ) + # Initialize Salesforce object responsible for communication to Salesforce: + self._salesforce = self.init_salesforce(external_system) + if system_type == "SuccessFactors" and not self._successfactors: + logger.info( + "Re-Initialize SuccessFactors connection for external system -> '%s'.", + system_name, + ) + # Initialize SuccessFactors object responsible for communication to SuccessFactors: + self._successfactors = self.init_successfactors(external_system) - for property_group_mapping in property_group_mappings: - group_name = property_group_mapping.get("@groupName") - set_name = property_group_mapping.get("@setName") - category_id = property_group_mapping.get("@categoryId") - mapping_type = property_group_mapping.get("@type") - logger.info( - "%s Property Group Mapping for Business Object -> %s: group -> %s is mapped to set -> %s (category -> %s)", - mapping_type, - bo_type_name, - group_name, - set_name, - category_id, - ) + logger.info("Skip to next external system...") + continue - property_mappings = property_group_mapping.get( - "propertyMapping", [] - ) - # This can happen if there's only 1 propertyMapping; - if not isinstance(property_mappings, list): - logger.info( - "Found a single property mapping in a dictionary (not in a list). Package it into a list...", - ) - property_mappings = [property_mappings] + # + # Create External System: + # - for property_mapping in property_mappings: - # for nested mappings we only have 2 fields - the rest is on the group level - see above - property_name = property_mapping.get("@propertyName") - attribute_name = property_mapping.get("@attributeName") - logger.info( - "%s Property Mapping inside group for Business Object -> %s: group -> %s, property -> %s is mapped to set -> %s, attribute -> %s (category -> %s)", - mapping_type, - bo_type_name, - group_name, - property_name, - set_name, - attribute_name, - category_id, - ) - # we write the group / set information also in the property mapping - # tp have a plain list with all information: - property_mapping["@groupName"] = group_name - property_mapping["@setName"] = set_name - property_mapping["@type"] = mapping_type - business_object_type["business_property_groups"].append( - property_mapping - ) - - self.write_status_file(success, section_name, self._business_object_types) - - return self._business_object_types - - # end method definition - - def get_business_object_properties(self, bo_type_name: str) -> dict: - """Get a dictionary with all property mapping of a business object type. - We contruct this dictionary from the two lists for the given - business object types (property mapping and property group mappings) - These two lists have been created before by process_business_object_types() - - This method is used for creation of business objects in Salesforce. - - Args: - bo_type_name (str): Name of the business object type - - Returns: - dict: dictionary with keys that are either the attribute name or - a key that is contructed like this: st name + "-" + attribute name. - This allows for an easy lookup in mthods that have access to - the category data of business workspaces. - """ - - # Find the matching business object type: - business_object_type = next( - ( - item - for item in self._business_object_types - if item["name"] == bo_type_name - ), - None, - ) - if not business_object_type: - return None + response = self._otcs.add_external_system_connection( + connection_name=system_name, + connection_type=connection_type, + as_url=as_url, + base_url=base_url, + username=str(username), + password=str(password), + authentication_method=auth_method, + client_id=oauth_client_id, + client_secret=oauth_client_secret, + ) + if response is None: + logger.error( + "Failed to create external system -> '%s'; type -> '%s'", + system_name, + connection_type, + ) + success = False + else: + logger.info("Successfully created external system -> '%s'", system_name) - business_properties = business_object_type.get("business_properties") - business_property_groups = business_object_type.get("business_property_groups") + # + # In case of an SAP external system we also initialize the SAP object + # + if system_type == "SAP": + # Initialize SAP object responsible for communication to SAP: + self._sap = self.init_sap(sap_external_system=external_system) - lookup_dict = {} + # + # In case of an SuccessFactors external system we also initialize the SuccessFactors object + # + if system_type == "SuccessFactors": + # Initialize SuccessFactors object responsible for communication to SuccessFactors: + self._successfactors = self.init_successfactors( + sucessfactors_external_system=external_system + ) - for mapping in business_properties: - attribute_name = mapping.get("@attributeName") - lookup_dict[attribute_name] = mapping + # + # In case of an Salesforce external system we also initialize the Salesforce object + # + if system_type == "Salesforce": + # Initialize Salesforce object responsible for communication to Salesforce: + self._salesforce = self.init_salesforce( + salesforce_external_system=external_system + ) - for mapping in business_property_groups: - set_name = mapping.get("@setName") - attribute_name = mapping.get("@attributeName") - lookup_dict[set_name + "-" + attribute_name] = mapping + self.write_status_file(success, section_name, self._external_systems) - return lookup_dict + return success - # end method definition + # end method definition - def process_workspace_types(self, section_name: str = "workspaceTypes") -> list: - """Create a data structure for all workspace types in the Extended ECM system. + def process_transport_packages( + self, transport_packages: list, section_name: str = "transportPackages" + ) -> bool: + """Process transport packages in payload and import them to Extended ECM. Args: - section_name (str, optional): name of the section. - This name is used for the "success" status + transport_packages (list): list of transport packages. As we + have three different lists (transport, + content_transport, transport_post) so + we need a parameter + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "transportPackagesPost") + This name is also used for the "success" status files written to the Admin Personal Workspace Returns: - list: list of workspace types. Each list element is a dict with these values: - - id (str) - - name (str) - - templates (list) - + name (str) - + id + bool: True if payload has been processed without errors, False otherwise """ + if not transport_packages: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + # If this payload section has been processed successfully before we - # still need to read the data structure from the status file and - # initialize self._workspace_types: + # can return True and skip processing it once more: if self.check_status_file(section_name): - # read the list from the json file in admin Home - # this is important for restart of customizer pod - # as this data structure is used later on for workspace processing - logger.info( - "Re-Initialize workspace types list from status file -> %s for later use...", - self.get_status_file_name(payload_section_name=section_name), - ) - self._workspace_types = self.get_status_file(section_name) - logger.info("Found -> %s workspace types.", str(len(self._workspace_types))) - logger.debug("Workspace types -> %s", str(self._workspace_types)) - return self._workspace_types + return True - # get all workspace types (these have been created by the transports and are not in the payload!) - # we need to do this each time as it needs to work across potential multiple payload files... - response = self._otcs.get_workspace_types() - if response is None: - logger.error("No workspace types found!") - self._workspace_types = [] - else: - self._workspace_types = response["results"] - logger.info("Found -> %s workspace types.", str(len(self._workspace_types))) - logger.debug("Workspace types -> %s", str(self._workspace_types)) + success: bool = True - # now we enrich the workspace_type list elments (which are dicts) - # with additional dict elements for further processing: - for workspace_type in self._workspace_types: - workspace_type_id = workspace_type["data"]["properties"]["wksp_type_id"] - logger.info("Workspace Type ID -> %s", workspace_type_id) - workspace_type["id"] = workspace_type_id - workspace_type_name = workspace_type["data"]["properties"]["wksp_type_name"] - logger.info("Workspace Type Name -> %s", workspace_type_name) - workspace_type["name"] = workspace_type_name - workspace_templates = workspace_type["data"]["properties"]["templates"] - # Create empty lists of dicts with template names and node IDs: - workspace_type["templates"] = [] - if workspace_templates: - # Determine available templates per workspace type (there can be multiple!) - for workspace_template in workspace_templates: - workspace_template_id = workspace_template["id"] - workspace_template_name = workspace_template["name"] - logger.info( - "Found workspace template with name -> %s and ID -> %s", - workspace_template_name, - workspace_template_id, - ) - template = { - "name": workspace_template_name, - "id": workspace_template_id, - } - workspace_type["templates"].append(template) + for transport_package in transport_packages: + if not "name" in transport_package: + logger.error( + "Transport Package needs a name! Skipping to next transport..." + ) + success = False + continue + name = transport_package["name"] - # Workaround for problem with workspace role inheritance - # which may be related to Transport or REST API: to work-around this we - # push down the workspace roles to the workspace folders explicitly: - response = self._otcs.get_workspace_roles(workspace_template_id) - - for roles in response["results"]: - role_name = roles["data"]["properties"]["name"] - role_id = roles["data"]["properties"]["id"] - permissions = roles["data"]["properties"]["perms"] - # as get_workspace_roles() delivers permissions as a value (bit encoded) - # we need to convert it to a permissions string list: - permission_string_list = ( - self._otcs.convert_permission_value_to_permission_string( - permissions - ) - ) + if "enabled" in transport_package and not transport_package["enabled"]: + logger.info( + "Payload for Transport Package -> '%s' is disabled. Skipping...", + name, + ) + continue - logger.info( - "Inherit permissions of workspace template -> %s and role -> %s to workspace folders...", - workspace_template_name, - role_name, - ) + if not "url" in transport_package: + logger.error( + "Transport Package -> '%s' needs a URL! Skipping to next transport...", + name, + ) + success = False + continue + url = transport_package["url"] - # Inherit permissions to folders of workspace template: - response = self._otcs.assign_workspace_permissions( - workspace_template_id, - role_id, - permission_string_list, - 1, # Only sub items - workspace node itself is OK - ) + if not "description" in transport_package: + logger.warning( + "Transport Package -> '%s' is missing a description", name + ) + description = transport_package.get("description", "") + # For some transports there can be string replacements + # configured: + if "replacements" in transport_package: + replacements = transport_package["replacements"] + logger.info( + "Transport -> '%s' has replacements -> %s", name, str(replacements) + ) else: - logger.warning( - "Workspace Types Name -> %s has no templates!", workspace_type_name + replacements = None + + # For some transports there can be data extractions + # configured: + if "extractions" in transport_package: + extractions = transport_package["extractions"] + logger.info( + "Transport -> '%s' has extractions -> %s", name, str(extractions) ) - continue + else: + extractions = None - self.write_status_file(True, section_name, self._workspace_types) + if description: + logger.info("Deploy transport -> '%s' ('%s')", name, description) + else: + logger.info("Deploy transport -> '%s'", name) - return self._workspace_types + response = self._otcs.deploy_transport( + url, name, description, replacements, extractions + ) + if response is None: + logger.error("Failed to deploy transport -> '%s'", name) + success = False + if self._stop_on_error: + break + else: + logger.info("Successfully deployed transport -> '%s'", name) + # Save the extractions for later processing, e.g. in process_business_object_types() + if extractions: + self.add_transport_extractions(extractions) + # end for transports - # end method definition + self.write_status_file(success, section_name, transport_packages) + self.write_status_file( + success, section_name + "Extractions", self._transport_extractions + ) - def process_workspace_templates( - self, section_name: str = "workspaceTemplates" - ) -> bool: - """Process Workspace Template playload. This allows to define role members on - template basis. This avoids having to "pollute" workspace template templates - with user or group information and instead controls this via payload. + return success + + # end method definition + + def process_user_photos(self, section_name: str = "userPhotos") -> bool: + """Process user photos in payload and assign them to Extended ECM users. Args: section_name (str, optional): name of the section. @@ -4996,6 +6423,10 @@ def process_workspace_templates( bool: True if payload has been processed without errors, False otherwise """ + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + # If this payload section has been processed successfully before we # can return True and skip processing it once more: if self.check_status_file(section_name): @@ -5003,334 +6434,1387 @@ def process_workspace_templates( success: bool = True - for workspace_template in self._workspace_templates: - # Read Workspace Type Name from payload: - if not "type_name" in workspace_template: - logger.error( - "Workspace template needs a type name! Skipping to next workspace template...", - ) - success = False - continue - type_name = workspace_template["type_name"] + # we assume the nickname of the photo item equals the login name of the user + # we also assume that the photos have been uploaded / transported into the target system + for user in self._users: + user_name = user["name"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in workspace_template and not workspace_template["enabled"]: + if "enabled" in user and not user["enabled"]: logger.info( - "Payload for Workspace Template -> %s is disabled. Skipping to next workspace template...", - type_name, + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue - # Read Workspace Template Name from payload: - if not "template_name" in workspace_template: + if not "id" in user: logger.error( - "Workspace Template for Workspace Type -> %s needs a template name! Skipping to next workspace template...", - type_name, + "User -> '%s' does not have an ID. The user creation may have failed before. Skipping...", + user_name, ) success = False continue - template_name = workspace_template["template_name"] - # Read members from payload: - if not "members" in workspace_template: - logger.info( - "Workspace template with type -> %s and name -> %s has no members in payload. Skipping to next workspace...", - type_name, - template_name, + user_id = user["id"] + + response = self._otcs.get_node_from_nickname(user_name) + if response is None: + logger.warning( + "Missing photo for user -> '%s' - nickname not found. Skipping...", + user_name, ) continue - members = workspace_template["members"] + photo_id = self._otcs.get_result_value(response, "id") + response = self._otcs.update_user_photo(user_id, photo_id) + if not response: + logger.error("Failed to add photo for user -> '%s'", user_name) + success = False + else: + logger.info("Successfully added photo for user -> '%s'", user_name) - # Find the workspace type with the name given in the _workspace_types - # datastructure that has been generated by process_workspace_types() method before: - workspace_type = next( - (item for item in self._workspace_types if item["name"] == type_name), - None, + # Check if Admin has a photo as well (nickname needs to be "admin"): + response = self._otcs.get_node_from_nickname("admin") + if response is None: + logger.warning("Missing photo for admin - nickname not found. Skipping...") + else: + photo_id = self._otcs.get_result_value(response, "id") + response = self._otcs.update_user_photo(1000, photo_id) + if response is None: + logger.warning("Failed to add photo for admin") + else: + logger.info("Successfully added photo for admin") + + self.write_status_file(success, section_name, self._users) + + return success + + # end method definition + + def process_user_photos_m365(self, section_name: str = "userPhotosM365") -> bool: + """Process user photos in payload and assign them to Microsoft 365 users. + + Args: + section_name (str, optional): name of the section. + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not isinstance(self._m365, M365): + logger.error( + "Microsoft 365 connection not setup properly. Skipping payload section -> '%s'...", + section_name, ) - if workspace_type is None: + return False + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + # we assume the nickname of the photo item equals the login name of the user + # we also assume that the photos have been uploaded / transported into the target system + for user in self._users: + user_name = user["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in user and not user["enabled"]: + logger.info( + "Payload for User -> '%s' is disabled. Skipping...", user_name + ) + continue + + if not "id" in user: logger.error( - "Workspace Type -> %s not found. Skipping to next workspace template...", - type_name, + "User -> '%s' does not have an ID. The user creation may have failed before. Skipping...", + user_name, ) success = False continue - if workspace_type["templates"] == []: - logger.error( - "Workspace Type -> %s does not have templates. Skipping to next workspace template...", - type_name, + + if not "enable_o365" in user or not user["enable_o365"]: + logger.info( + "Microsoft 365 is not enabled in payload for User -> '%s'. Skipping...", + user_name, ) + continue + + # If the customizer pod is restarted it may be that + # the M365 user exists even if the M365 user ID is + # not yet written back into the payload. So we use the + # determine_user_id_m365() method that handles both cases + # and updates the payload if the user exists in M365 already. + user_m365_id = self.determine_user_id_m365(user) + if not user_m365_id: + logger.error("M365 user -> '%s' does not exist. Skipping...", user_name) success = False continue - workspace_template = next( - ( - item - for item in workspace_type["templates"] - if item["name"] == template_name - ), - None, - ) - if workspace_template: # does this template exist? + if self._m365.get_user_photo(user_m365_id, show_error=False): logger.info( - "Workspace Template -> %s has been specified in payload and it does exist.", - template_name, + "User -> '%s' (%s) has already a photo in Microsoft 365. Skipping...", + user_name, + user_m365_id, ) + continue else: - logger.error( - "Workspace Template -> %s has been specified in payload but it doesn't exist!", - template_name, + logger.info( + "User -> '%s' (%s) has not yet a photo in Microsoft 365. Uploading...", + user_name, + user_m365_id, ) - logger.error( - "Workspace Type -> %s has only these templates -> %s", - type_name, - workspace_type["templates"], + + response = self._otcs.get_node_from_nickname(user_name) + if response is None: + logger.warning( + "Missing photo for user -> '%s' - nickname not found. Skipping...", + user_name, + ) + continue + photo_id = self._otcs.get_result_value(response, "id") + photo_name = self._otcs.get_result_value(response, "name") + photo_path = "/tmp/" + str(photo_name) + response = self._otcs.download_document(photo_id, photo_path) + if response is None: + logger.warning( + "Failed to download photo for user -> '%s' from Extended ECM", + user_name, ) success = False continue + else: + logger.info( + "Successfully downloaded photo for user -> '%s' from Extended ECM to file -> '%s'", + user_name, + photo_path, + ) - template_id = workspace_template["id"] + # Upload photo to M365: + response = self._m365.update_user_photo(user_m365_id, photo_path) + if response is None: + logger.error( + "Failed to upload photo for user -> '%s' to Microsoft 365", + user_name, + ) + success = False + else: + logger.info( + "Successfully uploaded photo for user -> '%s' to Microsoft 365", + user_name, + ) + # end for loop - workspace_roles = self._otcs.get_workspace_roles(template_id) - if workspace_roles is None: + # Check if Admin has a photo as well (nickname needs to be "admin") + # Then we want this to be applied in M365 as well: + response = self._otcs.get_node_from_nickname("admin") + if response is None: + logger.warning("Missing photo for admin - nickname not found. Skipping...") + else: + photo_id = self._otcs.get_result_value(response, "id") + photo_name = self._otcs.get_result_value(response, "name") + photo_path = "/tmp/" + str(photo_name) + response = self._otcs.download_document(photo_id, photo_path) + if response is None: + logger.warning( + "Failed to download photo for admin user from Extended ECM", + ) + success = False + else: logger.info( - "Workspace Template %s with node Id -> %s has no roles. Skipping to next workspace...", - template_name, - template_id, + "Successfully downloaded photo for admin from Extended ECM to file -> '%s'", + photo_path, ) - continue + m365_admin_email = "admin@" + self._m365.config()["domain"] + response = self._m365.update_user_photo(m365_admin_email, photo_path) + if response is None: + logger.warning("Failed to add photo for %s", m365_admin_email) + else: + logger.info("Successfully added photo for %s", m365_admin_email) - for member in members: - # read user list and role name from payload: - member_users = ( - member["users"] if member.get("users") else [] - ) # be careful to avoid key errors as users are optional - member_groups = ( - member["groups"] if member.get("groups") else [] - ) # be careful to avoid key errors as groups are optional - member_role_name = member["role"] + self.write_status_file(success, section_name, self._users) - if member_role_name == "": # role name is required - logger.error( - "Members of workspace template -> %s is missing the role name.", - template_name, - ) - success = False - continue - if ( - member_users == [] and member_groups == [] - ): # we either need users or groups (or both) - logger.warning( - "Role -> %s of workspace template -> %s does not have any members (no users nor groups).", - member_role_name, - template_name, - ) - continue + return success - role_id = self._otcs.lookup_result_value( - workspace_roles, "name", member_role_name, "id" - ) - if role_id is None: - # if member_role is None: - logger.error( - "Workspace template -> %s does not have a role with name -> %s", - template_name, - member_role_name, - ) - success = False - continue - logger.info("Role -> %s has ID -> %s", member_role_name, role_id) + # end method definition - # Process users as workspace template members: - for member_user in member_users: - # find member user in current payload: - member_user_id = next( - (item for item in self._users if item["name"] == member_user), - {}, - ) - if member_user_id: - user_id = member_user_id["id"] - else: - # If this didn't work, try to get the member user from OTCS. This covers - # cases where the user is system generated or part - # of a former payload processing (thus not in the current payload): - logger.info( - "Member -> %s not found in current payload - check if it exists in OTCS already...", - member_user, - ) - response = self._otcs.get_user(member_user) - user_id = self._otcs.lookup_result_value( - response, key="name", value=member_user, return_key="id" - ) - if not user_id: - logger.error( - "Cannot find member user with login -> %s. Skipping...", - member_user, - ) - success = False - continue + def process_user_photos_salesforce( + self, section_name: str = "userPhotosSalesforce" + ) -> bool: + """Process user photos in payload and assign them to Salesforce users. - # Add member if it does not yet exists - suppress warning - # message if user is already in role: - response = self._otcs.add_member_to_workspace( - template_id, int(role_id), user_id, False - ) - if response is None: - logger.error( - "Failed to add user -> %s (%s) to role -> %s of workspace template -> %s", - member_user, - user_id, - member_role_name, - template_name, - ) - success = False - else: - logger.info( - "Successfully added user -> %s (%s) to role -> %s of workspace template -> %s", - member_user, - user_id, - member_role_name, - template_name, - ) + Args: + section_name (str, optional): name of the section. + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ - # Process groups as workspace template members: - for member_group in member_groups: - member_group_id = next( - (item for item in self._groups if item["name"] == member_group), - None, - ) - if member_group_id is None: - logger.error("Cannot find group with name -> %s", member_group) - success = False - continue - group_id = member_group_id["id"] + if not isinstance(self._salesforce, Salesforce): + logger.error( + "Salesforce connection not setup properly. Skipping payload section -> '%s'...", + section_name, + ) + return False + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + # we assume the nickname of the photo item equals the login name of the user + # we also assume that the photos have been uploaded / transported into the target system + for user in self._users: + user_name = user["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in user and not user["enabled"]: + logger.info( + "Payload for User -> '%s' is disabled. Skipping...", user_name + ) + continue + + # Check if the user is enabled for Salesforce: + if not "enable_salesforce" in user or not user["enable_salesforce"]: + logger.info( + "User -> '%s' is not enabled for Salesforce. Skipping...", + user_name, + ) + continue + + extra_attributes = user.get("extra_attributes", None) + if not extra_attributes or len(extra_attributes) == 0: + logger.info( + "User -> '%s' does not have the extra attributes for Salesforce. Skipping...", + user_name, + ) + continue + user_login = extra_attributes[0].get("value", "") + if not user_login: + logger.info( + "User -> '%s' does not have the extra attributes value for Salesforce. Skipping...", + user_name, + ) + continue + + user_id = self._salesforce.get_user_id(username=user_login) + if user_id is None: + logger.error( + "Failed to get Salesforce user ID of user -> %s", + user_login, + ) + success = False + continue + + response = self._otcs.get_node_from_nickname(user_name) + if response is None: + logger.warning( + "Missing photo for user -> '%s' - nickname not found. Skipping...", + user_name, + ) + continue + photo_id = self._otcs.get_result_value(response, "id") + photo_name = self._otcs.get_result_value(response, "name") + photo_path = "/tmp/" + str(photo_name) - response = self._otcs.add_member_to_workspace( - template_id, int(role_id), group_id + # Check if it is not yet downloaded: + if not os.path.isfile(photo_path): + # download the profile picture into the tmp directory: + response = self._otcs.download_document(photo_id, photo_path) + if response is None: + logger.warning( + "Failed to download photo for user -> '%s' from Extended ECM to file -> '%s'", + user_name, + photo_path, ) - if response is None: - logger.error( - "Failed to add group -> %s (%s) to role -> %s of workspace template -> %s", - member_group_id["name"], - group_id, - member_role_name, - template_name, - ) - success = False - else: - logger.info( - "Successfully added group -> %s (%s) to role -> %s of workspace template -> %s", - member_group_id["name"], - group_id, - member_role_name, - template_name, - ) + success = False + continue + else: + logger.info( + "Successfully downloaded photo for user -> '%s' from Extended ECM to file -> '%s'", + user_name, + photo_path, + ) + else: + logger.info( + "Reusing downloaded photo -> '%s' for Salesforce user -> '%s' (%s)", + photo_path, + user_name, + user_id, + ) + + response = self._salesforce.update_user_photo( + user_id=user_id, photo_path=photo_path + ) + if response: + logger.info( + "Successfully updated profile photo of Salesforce user -> '%s' (%s).", + user_login, + user_id, + ) + else: + logger.error( + "Failed to update profile photo of Salesforce user -> '%s' (%s). Skipping...", + user_login, + user_id, + ) + success = False + continue - self.write_status_file(success, section_name, self._workspace_types) + self.write_status_file(success, section_name, self._users) return success - # end method definition + # end method definition - def prepare_workspace_create_form( - self, - categories: list, - template_id: int, - ext_system_id: int, - bo_type: int, - bo_id: int, - parent_workspace_node_id: int, - ) -> dict | None: - """Prepare the category structure for the workspace creation. + def process_user_photos_core_share( + self, section_name: str = "userPhotosCoreShare" + ) -> bool: + """Process user photos in payload and assign them to Core Share users. Args: - categories (list): categories list from workspace payload - template_id (int): workspace template ID - ext_system_id (int): External system ID - bo_type (int): Business Object Type ID - bo_id (int): Business Object ID - parent_workspace_node_id (int): Parent Workspace ID - + section_name (str, optional): name of the section. + This name is also used for the "success" status + files written to the Admin Personal Workspace Returns: - dict | None: category structure for workspace creation or None - in case of an error. + bool: True if payload has been processed without errors, False otherwise """ - category_create_data = {"categories": {}} - - response = self._otcs.get_workspace_create_form( - template_id=template_id, - external_system_id=ext_system_id, - bo_type=bo_type, - bo_id=bo_id, - parent_id=parent_workspace_node_id, - ) - if response is None: + if not isinstance(self._core_share, CoreShare): logger.error( - "Failed to retrieve create information for template -> %s", - template_id, + "Core Share connection not setup properly. Skipping payload section -> '%s'...", + section_name, ) - return None + return False - logger.info( - "Successfully retrieved create information for template -> %s", - template_id, - ) + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True - # Process category information - forms = response["forms"] + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True - categories_form = {} + success: bool = True - # Typically the the create workspace form delivers 3 forms: - # 1. Form for System Attributes (has no role name) - # 2. Form for Category Data (role name = "categories") - # 3. Form for Classifications (role name = "classifications") - # First we extract these 3 forms: - for form in forms: - if "role_name" in form and form["role_name"] == "categories": - categories_form = form - logger.debug("Found Categories form -> %s", form) + # we assume the nickname of the photo item equals the login name of the user + # we also assume that the photos have been uploaded / transported into the target system + for user in self._users: + if not "lastname" in user or not "firstname" in user: + logger.error( + "User is missing last name or first name. Skipping to next user..." + ) + success = False continue - if "role_name" in form and form["role_name"] == "classifications": - logger.debug("Found Classification form -> %s", form) + user_login = user["name"] + user_last_name = user["lastname"] + user_first_name = user["firstname"] + user_name = user_first_name + " " + user_last_name + # user_email = user.get("email", "") + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in user and not user["enabled"]: + logger.info( + "Payload for User -> '%s' is disabled. Skipping...", user_name + ) continue - # the remaining option is that this form is the system attributes form: - logger.debug("Found System Attributes form -> %s", form) - # We are just interested in the single category data set (role_name = "categories"): - data = categories_form["data"] - logger.debug("Categories data found -> %s", data) - schema = categories_form["schema"]["properties"] - logger.debug("Categories schema found -> %s", schema) - # parallel loop over category data and schema - for cat_data, cat_schema in zip(data, schema): - logger.info("Category ID -> %s", cat_data) - data_attributes = data[cat_data] - logger.debug("Data Attributes -> %s", data_attributes) - schema_attributes = schema[cat_schema]["properties"] - logger.debug("Schema Attributes -> %s", schema_attributes) - cat_name = schema[cat_schema]["title"] - logger.info("Category name -> %s", cat_name) - # parallel loop over attribute data and schema - # Sets with one (fixed) row have type = object - # Multi-value Sets with (multiple) rows have type = array and "properties" in "items" schema - # Multi-value attributes have also type = array but NO "properties" in "items" schema - for attr_data, attr_schema in zip(data_attributes, schema_attributes): - logger.debug("Attribute ID -> %s", attr_data) - logger.debug("Attribute Data -> %s", data_attributes[attr_data]) - logger.debug("Attribute Schema -> %s", schema_attributes[attr_schema]) - attr_type = schema_attributes[attr_schema]["type"] - logger.debug("Attribute Type -> %s", attr_type) - if not "title" in schema_attributes[attr_schema]: - logger.debug("Attribute has no title. Skipping...") + # Check if the user is enabled for Salesforce: + if not "enable_core_share" in user or not user["enable_core_share"]: + logger.info( + "User -> '%s' is not enabled for Core Share. Skipping...", + user_name, + ) + continue + + core_share_user_id = self.determine_user_id_core_share(user) + + if core_share_user_id is None: + logger.error( + "Failed to get ID of Core Share user -> %s", + user_name, + ) + success = False + continue + + response = self._otcs.get_node_from_nickname(user_login) + if response is None: + logger.warning( + "Missing photo for user -> '%s' - nickname not found. Skipping...", + user_login, + ) + continue + photo_id = self._otcs.get_result_value(response, "id") + photo_name = self._otcs.get_result_value(response, "name") + photo_path = "/tmp/" + str(photo_name) + + # Check if it is not yet downloaded: + if not os.path.isfile(photo_path): + # download the profile picture into the tmp directory: + response = self._otcs.download_document(photo_id, photo_path) + if response is None: + logger.warning( + "Failed to download photo for user -> '%s' from Extended ECM to file -> '%s'", + user_name, + photo_path, + ) + success = False continue - # Check if it is an multi-line set: + else: + logger.info( + "Successfully downloaded photo for user -> '%s' from Extended ECM to file -> '%s'", + user_name, + photo_path, + ) + else: + logger.info( + "Reusing downloaded photo -> '%s' for Core Share user -> '%s' (%s)", + photo_path, + user_name, + core_share_user_id, + ) + + response = self._core_share.update_user_photo( + user_id=core_share_user_id, photo_path=photo_path + ) + if response: + logger.info( + "Successfully updated profile photo of Core Share user -> '%s' (%s).", + user_name, + core_share_user_id, + ) + else: + logger.error( + "Failed to update profile photo of Core Share user -> '%s' (%s). Skipping...", + user_name, + core_share_user_id, + ) + success = False + continue + + self.write_status_file(success, section_name, self._users) + + return success + + # end method definition + + def process_business_object_types( + self, section_name: str = "businessObjectTypes" + ) -> list: + """Create a data structure for all business object types in the Extended ECM system. + + Args: + section_name (str, optional): name of the section. + This name is used for the "success" status + files written to the Admin Personal Workspace + Returns: + list: list of business object types. Each list element is a dict with these values: + - id (str) + - name (str) + - type (str) + - ext_system_id (str) + - business_properties (list) + - business_property_groups (list) + """ + + # If this payload section has been processed successfully before we + # still need to read the data structure from the status file and + # initialize self._business_object_types: + if self.check_status_file(section_name): + # read the list from the json file in admin Home + # this is important for restart of customizer pod + # as this data structure is used later on for workspace processing + logger.info( + "Re-Initialize business object types list from status file -> '%s' for later use...", + self.get_status_file_name(payload_section_name=section_name), + ) + self._business_object_types = self.get_status_file(section_name) + logger.info( + "Found -> %s business object types.", + str(len(self._business_object_types)), + ) + logger.debug( + "Business object types -> %s", str(self._business_object_types) + ) + return self._business_object_types + + success: bool = True + + # get all workspace types (these have been created by the transports and are not in the payload!) + # we need to do this each time as it needs to work across potential multiple payload files... + response = self._otcs.get_business_object_types() + if response is None: + logger.info("No business object types found!") + self._business_object_types = [] + else: + self._business_object_types = response["results"] + logger.info( + "Found -> %s business object types.", + str(len(self._business_object_types)), + ) + logger.debug( + "Business object types -> %s", str(self._business_object_types) + ) + + # now we enrich the workspace_type list elments (which are dicts) + # with additional dict elements for further processing: + for business_object_type in self._business_object_types: + # Get BO Type (e.g. KNA1): + bo_type = business_object_type["data"]["properties"]["bo_type"] + logger.debug("Business Object Type -> %s", bo_type) + business_object_type["type"] = bo_type + # Get BO Type ID: + bo_type_id = business_object_type["data"]["properties"]["bo_type_id"] + logger.debug("Business Object Type ID -> %s", bo_type_id) + business_object_type["id"] = bo_type_id + # Get BO Type Name: + bo_type_name = business_object_type["data"]["properties"]["bo_type_name"] + logger.debug("Business Object Type Name -> %s", bo_type_name) + business_object_type["name"] = bo_type_name + # Get External System ID: + ext_system_id = business_object_type["data"]["properties"]["ext_system_id"] + logger.debug("External System ID -> %s", ext_system_id) + business_object_type["ext_system_id"] = ext_system_id + + # Get additional information per BO Type (this REST API is severly + # limited) - it does not return Property names from External System + # and is also missing Business Property Groups: + # if not "/" in bo_type: + # response = self._otcs.get_business_object_type( + # external_system_id=ext_system_id, type_name=bo_type + # ) + # if response is None or not response["results"]: + # logger.warning( + # "Cannot retrieve additional information for business object type -> %s. Skipping...", + # bo_type, + # ) + # continue + # business_properties = response["results"]["data"][ + # "business_object_type" + # ]["data"]["businessProperties"] + # business_object_type["business_properties"] = business_properties + # else: + # logger.warning( + # "Business Object Type -> '%s' does not have a proper name to call REST API.", + # bo_type, + # ) + # business_object_type["business_properties"] = [] + + business_object_type["business_properties"] = [] + business_object_type["business_property_groups"] = [] + + # Now we complete the data with what we have extracted from the transport packages + # for Business Object Types. This is a workaround for the insufficient REST API + # implementation (see otcs.get_business_object_type) + if self._transport_extractions: + logger.info( + "Enrich Business Object Types with data extractions from transport packages (found '%s' extractions)...", + str(len(self._transport_extractions)), + ) + else: + logger.info( + "No transport extractions are recorded. This may be because of customizer restart." + ) + extraction_status_file = "transportPackagesExtractions" + if self.check_status_file(extraction_status_file): + logger.info( + "Try to load extractions from success file -> '%s'...", + extraction_status_file, + ) + self._transport_extractions = self.get_status_file( + extraction_status_file + ) + + for extraction in self._transport_extractions: + xpath = extraction.get("data") + data_list = extraction.get("data") + if not data_list: + logger.error( + "Extraction -> '%s' is missing the data element. Skipping...", + xpath, + ) + success = False + continue + if not isinstance(data_list, list): + logger.warning( + "Extracted data for -> '%s' is not a list. Cannot process it. Skipping...", + xpath, + ) + continue + + # The following loop processes a dictionasry of this structure: + + # llnode: { + # '@created': '2017-11-23T16:43:35', + # '@createdby': '1000', + # '@createdbyname': 'Terrarium Admin', + # '@description': '', + # '@id': '16013', + # '@modified': '2023-12-09T12:08:21', + # '@name': 'SFDC Order', + # '@objname': 'Business Object Type', + # '@objtype': '889', + # '@ownedby': '1000', + # '@ownedbyname': 'Terrarium Admin', + # '@parentguid': '95F96645-057D-4EAF-9083-BE9F24C0CB6C', + # '@parentid': '2898', + # '@parentname': 'Business Object Types', + # ... + # 'Nickname': {'@domain': ''}, + # 'name': {'@xml:lang': 'en', '#text': 'SFDC Order'}, + # 'description': {'@xml:lang': 'en'}, + # 'businessObjectTypeInfo': { + # 'basicInfo': { + # '@businessObjectId': '9', + # '@businessobjectType': 'Order', + # '@deleted': 'false', + # '@name': 'SFDC Order', + # '@subtype': '889', + # '@useBusWorkspace': 'true', + # 'displayUrl': {...} + # }, + # 'businessApplication': { + # 'businessObjectTypeReference': {...}}, + # 'businessAttachmentInfo': { + # '@automaticAddingOfBusinessObject': 'false', + # '@canbeAddedAsBusinessObject': 'false', + # '@enableBADIBeforeAddingBO': 'false', + # '@enableBADIBeforeRemovingBO': 'false', + # '@enableMetadataMapping': 'false' + # }, + # 'managedObjectTypes': { + # 'managedObjectType': [...] + # }, + # 'multilingualNames': {'language': [...]}, + # 'callbacks': {'callback': [...]}, + # 'workspaceTypeReference': {'@isDefaultDisplay': 'false', '@isDefaultSearch': 'false', 'businessObjectTypeReference': {...}}, + # 'businessPropertyMappings': { + # 'propertyMapping': [...] + # }, + # 'businessPropertyGroupMappings': { + # 'propertyGroupMapping': [...] + # }, + # 'documentTypes': { + # 'documentType': [...] + # }, + # 'CustomBOTypeInfo': None + # } + # } + + for data in data_list: + # + # Level 1: llnode + # + llnode = data.get("llnode") + if not llnode: + logger.error("Missing llnode structure in data. Skipping...") + success = False + continue + + # + # Level 2: businessobjectTypeInfo + # + business_object_type_info = llnode.get( + "businessobjectTypeInfo", None + ) + if not business_object_type_info: + logger.error( + "Information is missing for Business Object Type -> '%s'. Skipping...", + bo_type_name, + ) + success = False + continue + + # Check if this extraction is for the current business object type: + basic_info = business_object_type_info.get("basicInfo", None) + if not basic_info: + logger.error( + "Cannot find Basic Info of Business Object Type -> '%s'. Skipping...", + bo_type_name, + ) + success = False + continue + name = basic_info.get("@businessobjectType", "") + if not name: + logger.error( + "Cannot find name of Business Object Type -> '%s'. Skipping...", + bo_type_name, + ) + success = False + continue + obj_type = llnode.get("@objtype", None) + # we need to compare bo_type and NOT bo_type_name here! + # Otherwise we don't find the SAP and SuccessFactors data: + if name != bo_type or obj_type != "889": + continue + + # + # Level 3: businessPropertyMappings - plain, non-grouped properties + # + business_property_mappings = business_object_type_info.get( + "businessPropertyMappings", None + ) + if not business_property_mappings: + logger.info( + "No Property Mapping for Business Object Type -> '%s'. Skipping...", + bo_type_name, + ) + else: + property_mappings = business_property_mappings.get( + "propertyMapping", [] + ) + # This can happen if there's only 1 propertyMapping; + if not isinstance(property_mappings, list): + logger.debug( + "Found a single property mapping in a dictionary (not in a list). Package it into a list...", + ) + property_mappings = [property_mappings] + + for property_mapping in property_mappings: + property_name = property_mapping.get("@propertyName") + attribute_name = property_mapping.get("@attributeName") + category_id = property_mapping.get("@categoryId") + mapping_type = property_mapping.get("@type") + logger.debug( + "%s Property Mapping for Business Object -> '%s' property -> '%s' is mapped to attribute -> '%s' (category -> %s)", + mapping_type, + bo_type_name, + property_name, + attribute_name, + category_id, + ) + business_object_type["business_properties"].append( + property_mapping + ) + + # + # Level 3: businessPropertyGroupMappings - grouped properties + # + business_property_group_mappings = business_object_type_info.get( + "businessPropertyGroupMappings", None + ) + if not business_property_group_mappings: + logger.info( + "No Property Group Mapping for Business Object Type -> '%s'. Skipping...", + bo_type_name, + ) + continue + + property_group_mappings = business_property_group_mappings.get( + "propertyGroupMapping", [] + ) + # This can happen if there's only 1 propertyMapping; + if isinstance(property_group_mappings, dict): + logger.debug( + "Found a single property group mapping in a dictionary (not in a list). Pack it into a list...", + ) + property_group_mappings = [property_group_mappings] + + for property_group_mapping in property_group_mappings: + group_name = property_group_mapping.get("@groupName") + set_name = property_group_mapping.get("@setName") + category_id = property_group_mapping.get("@categoryId") + mapping_type = property_group_mapping.get("@type") + logger.debug( + "%s Property Group Mapping for Business Object -> %s: group -> '%s' is mapped to set -> '%s' (category -> %s)", + mapping_type, + bo_type_name, + group_name, + set_name, + category_id, + ) + + property_mappings = property_group_mapping.get( + "propertyMapping", [] + ) + # This can happen if there's only 1 propertyMapping; + if not isinstance(property_mappings, list): + logger.debug( + "Found a single property mapping in a dictionary (not in a list). Package it into a list...", + ) + property_mappings = [property_mappings] + + for property_mapping in property_mappings: + # for nested mappings we only have 2 fields - the rest is on the group level - see above + property_name = property_mapping.get("@propertyName") + attribute_name = property_mapping.get("@attributeName") + logger.debug( + "%s Property Mapping inside group for Business Object -> '%s', group -> '%s', property -> '%s' is mapped to set -> %s, attribute -> '%s' (category -> %s)", + mapping_type, + bo_type_name, + group_name, + property_name, + set_name, + attribute_name, + category_id, + ) + # we write the group / set information also in the property mapping + # tp have a plain list with all information: + property_mapping["@groupName"] = group_name + property_mapping["@setName"] = set_name + property_mapping["@type"] = mapping_type + business_object_type["business_property_groups"].append( + property_mapping + ) + + self.write_status_file(success, section_name, self._business_object_types) + + return self._business_object_types + + # end method definition + + def get_business_object_properties(self, bo_type_name: str) -> dict: + """Get a dictionary with all property mapping of a business object type. + We contruct this dictionary from the two lists for the given + business object types (property mapping and property group mappings) + These two lists have been created before by process_business_object_types() + + This method is used for creation of business objects in Salesforce. + + Args: + bo_type_name (str): Name of the business object type + + Returns: + dict: dictionary with keys that are either the attribute name or + a key that is contructed like this: set name + "-" + attribute name. + This allows for an easy lookup in methods that have access to + the category data of business workspaces. + """ + + if not self._business_object_types: + logger.warning( + "List of business object types is empty / not initialized! Cannot lookup type with name -> '%s'", + bo_type_name, + ) + return None + + # Find the matching business object type: + business_object_type = next( + ( + item + for item in self._business_object_types + if item["name"] == bo_type_name + ), + None, + ) + if not business_object_type: + logger.warning( + "Cannot find business object type with name -> '%s'", bo_type_name + ) + return None + + business_properties = business_object_type.get("business_properties") + business_property_groups = business_object_type.get("business_property_groups") + + lookup_dict = {} + + for mapping in business_properties: + attribute_name = mapping.get("@attributeName") + lookup_dict[attribute_name] = mapping + + for mapping in business_property_groups: + set_name = mapping.get("@setName") + attribute_name = mapping.get("@attributeName") + lookup_dict[set_name + "-" + attribute_name] = mapping + + return lookup_dict + + # end method definition + + def process_workspace_types(self, section_name: str = "workspaceTypes") -> list: + """Create a data structure for all workspace types in the Extended ECM system. + + Args: + section_name (str, optional): name of the section. + This name is used for the "success" status + files written to the Admin Personal Workspace + Returns: + list: list of workspace types. Each list element is a dict with these values: + - id (str) + - name (str) + - templates (list) + + name (str) + + id + """ + + # If this payload section has been processed successfully before we + # still need to read the data structure from the status file and + # initialize self._workspace_types: + if self.check_status_file(section_name): + # read the list from the json file in admin Home + # this is important for restart of customizer pod + # as this data structure is used later on for workspace processing + logger.info( + "Re-Initialize workspace types list from status file -> '%s' for later use...", + self.get_status_file_name(payload_section_name=section_name), + ) + self._workspace_types = self.get_status_file(section_name) + logger.info("Found -> %s workspace types.", str(len(self._workspace_types))) + logger.debug("Workspace types -> %s", str(self._workspace_types)) + return self._workspace_types + + # Read payload_section "workspaceTypes" if available + payload_section = {} + for section in self._payload_sections: + if section["name"] == "workspaceTypes": + payload_section = section + + # get all workspace types (these have been created by the transports and are not in the payload!) + # we need to do this each time as it needs to work across potential multiple payload files... + response = self._otcs.get_workspace_types() + if response is None: + logger.error("No workspace types found!") + self._workspace_types = [] + else: + self._workspace_types = response["results"] + logger.info("Found -> %s workspace types.", str(len(self._workspace_types))) + logger.debug("Workspace types -> %s", str(self._workspace_types)) + + # now we enrich the workspace_type list elments (which are dicts) + # with additional dict elements for further processing: + for workspace_type in self._workspace_types: + workspace_type_id = workspace_type["data"]["properties"]["wksp_type_id"] + logger.debug("Workspace Type ID -> %s", workspace_type_id) + workspace_type["id"] = workspace_type_id + workspace_type_name = workspace_type["data"]["properties"]["wksp_type_name"] + logger.info("Workspace Type Name -> '%s'", workspace_type_name) + workspace_type["name"] = workspace_type_name + workspace_templates = workspace_type["data"]["properties"]["templates"] + # Create empty lists of dicts with template names and node IDs: + workspace_type["templates"] = [] + if workspace_templates: + # Determine available templates per workspace type (there can be multiple!) + for workspace_template in workspace_templates: + workspace_template_id = workspace_template["id"] + workspace_template_name = workspace_template["name"] + logger.info( + "Found workspace template with name -> '%s' and ID -> %s.", + workspace_template_name, + workspace_template_id, + ) + template = { + "name": workspace_template_name, + "id": workspace_template_id, + } + workspace_type["templates"].append(template) + + if payload_section.get("inherit_permissions", True): + # Workaround for problem with workspace role inheritance + # which may be related to Transport or REST API: to work-around this we + # push down the workspace roles to the workspace folders explicitly: + response = self._otcs.get_workspace_roles(workspace_template_id) + for roles in response["results"]: + role_name = roles["data"]["properties"]["name"] + role_id = roles["data"]["properties"]["id"] + permissions = roles["data"]["properties"]["perms"] + # as get_workspace_roles() delivers permissions as a value (bit encoded) + # we need to convert it to a permissions string list: + permission_string_list = self._otcs.convert_permission_value_to_permission_string( + permissions + ) + + logger.info( + "Inherit permissions of workspace template -> '%s' and role -> '%s' to workspace folders...", + workspace_template_name, + role_name, + ) + + # Inherit permissions to folders of workspace template: + response = self._otcs.assign_workspace_permissions( + workspace_template_id, + role_id, + permission_string_list, + 1, # Only sub items - workspace node itself is OK + ) + + else: + logger.warning( + "Workspace Types Name -> '%s' has no templates!", + workspace_type_name, + ) + continue + + self.write_status_file(True, section_name, self._workspace_types) + + return self._workspace_types + + # end method definition + + def process_workspace_templates( + self, section_name: str = "workspaceTemplates" + ) -> bool: + """Process Workspace Template playload. This allows to define role members on + template basis. This avoids having to "pollute" workspace templates + with user or group information and instead controls this via payload. + + Args: + section_name (str, optional): name of the section. + This name is used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for workspace_template in self._workspace_templates: + # Read Workspace Type Name from payload: + if not "type_name" in workspace_template: + logger.error( + "Workspace template needs a type name! Skipping to next workspace template...", + ) + success = False + continue + type_name = workspace_template["type_name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in workspace_template and not workspace_template["enabled"]: + logger.info( + "Payload for Workspace Template -> '%s' is disabled. Skipping to next workspace template...", + type_name, + ) + continue + + # Read Workspace Template Name from payload: + if not "template_name" in workspace_template: + logger.error( + "Workspace Template for Workspace Type -> '%s' needs a template name! Skipping to next workspace template...", + type_name, + ) + success = False + continue + template_name = workspace_template["template_name"] + + # Read members from payload: + if not "members" in workspace_template: + logger.info( + "Workspace template with type -> '%s' and name -> '%s' has no members in payload. Skipping to next workspace...", + type_name, + template_name, + ) + continue + members = workspace_template["members"] + + # Find the workspace type with the name given in the _workspace_types + # datastructure that has been generated by process_workspace_types() method before: + workspace_type = next( + (item for item in self._workspace_types if item["name"] == type_name), + None, + ) + if workspace_type is None: + logger.error( + "Workspace Type -> '%s' not found. Skipping to next workspace template...", + type_name, + ) + success = False + continue + if workspace_type["templates"] == []: + logger.error( + "Workspace Type -> '%s' does not have templates. Skipping to next workspace template...", + type_name, + ) + success = False + continue + + workspace_template = next( + ( + item + for item in workspace_type["templates"] + if item["name"] == template_name + ), + None, + ) + if workspace_template: # does this template exist? + logger.info( + "Workspace Template -> '%s' has been specified in payload and it does exist.", + template_name, + ) + else: + logger.error( + "Workspace Template -> '%s' has been specified in payload but it doesn't exist!", + template_name, + ) + logger.error( + "Workspace Type -> '%s' has only these templates -> %s", + type_name, + workspace_type["templates"], + ) + success = False + continue + + template_id = workspace_template["id"] + + workspace_roles = self._otcs.get_workspace_roles(template_id) + if workspace_roles is None: + logger.info( + "Workspace Template '%s' with node Id -> %s has no roles. Skipping to next workspace...", + template_name, + template_id, + ) + continue + + for member in members: + # read user list and role name from payload: + member_users = ( + member["users"] if member.get("users") else [] + ) # be careful to avoid key errors as users are optional + member_groups = ( + member["groups"] if member.get("groups") else [] + ) # be careful to avoid key errors as groups are optional + member_role_name = member["role"] + + if member_role_name == "": # role name is required + logger.error( + "Members of workspace template -> '%s' is missing the role name.", + template_name, + ) + success = False + continue + if ( + member_users == [] and member_groups == [] + ): # we either need users or groups (or both) in the payload + logger.debug( + "Payload for workspace template -> '%s' and role -> '%s' does not have any members (no users nor groups).", + template_name, + member_role_name, + ) + continue + + role_id = self._otcs.lookup_result_value( + workspace_roles, "name", member_role_name, "id" + ) + if role_id is None: + # if member_role is None: + logger.error( + "Workspace template -> '%s' does not have a role with name -> '%s'", + template_name, + member_role_name, + ) + success = False + continue + logger.info("Role -> '%s' has ID -> %s", member_role_name, role_id) + + # Process users as workspace template members: + for member_user in member_users: + # find member user in current payload: + member_user_id = next( + (item for item in self._users if item["name"] == member_user), + {}, + ) + if member_user_id: + user_id = member_user_id["id"] + else: + # If this didn't work, try to get the member user from OTCS. This covers + # cases where the user is system generated or part + # of a former payload processing (thus not in the current payload): + logger.info( + "Member -> '%s' not found in current payload - check if it exists in OTCS already...", + member_user, + ) + response = self._otcs.get_user(name=member_user) + user_id = self._otcs.lookup_result_value( + response, key="name", value=member_user, return_key="id" + ) + if not user_id: + logger.error( + "Cannot find member user with login -> '%s'. Skipping...", + member_user, + ) + success = False + continue + + # Add member if it does not yet exists - suppress warning + # message if user is already in role: + response = self._otcs.add_workspace_member( + workspace_id=template_id, + role_id=int(role_id), + member_id=user_id, + show_warning=False, + ) + if response is None: + logger.error( + "Failed to add user -> '%s' (%s) as member to role -> '%s' of workspace template -> '%s'", + member_user, + user_id, + member_role_name, + template_name, + ) + success = False + else: + logger.info( + "Successfully added user -> '%s' (%s) as member to role -> '%s' of workspace template -> '%s'", + member_user, + user_id, + member_role_name, + template_name, + ) + + # Process groups as workspace template members: + for member_group in member_groups: + member_group_id = next( + (item for item in self._groups if item["name"] == member_group), + None, + ) + if member_group_id is None: + logger.error( + "Cannot find group with name -> '%s'", member_group + ) + success = False + continue + group_id = member_group_id["id"] + + response = self._otcs.add_workspace_member( + workspace_id=template_id, + role_id=int(role_id), + member_id=group_id, + show_warning=False, + ) + if response is None: + logger.error( + "Failed to add group -> '%s' (%s) as member to role -> '%s' of workspace template -> '%s'", + member_group_id["name"], + group_id, + member_role_name, + template_name, + ) + success = False + else: + logger.info( + "Successfully added group -> '%s' (%s) as member to role -> '%s' of workspace template -> '%s'", + member_group_id["name"], + group_id, + member_role_name, + template_name, + ) + + self.write_status_file(success, section_name, self._workspace_templates) + + return success + + # end method definition + + def prepare_workspace_create_form( + self, + categories: list, + template_id: int, + ext_system_id: str | None = None, + bo_type: str | None = None, + bo_id: str | None = None, + parent_workspace_node_id: int | None = None, + ) -> dict | None: + """Prepare the category structure for the workspace creation. + + Args: + categories (list): categories list from workspace payload + template_id (int): workspace template ID + ext_system_id (str, optional): External system ID + bo_type (str, optional): Business Object Type ID + bo_id (str, optional): Business Object ID + parent_workspace_node_id (int, optional): Parent Workspace ID + + Returns: + dict | None: category structure for workspace creation or None + in case of an error. + """ + + category_create_data = {"categories": {}} + + response = self._otcs.get_workspace_create_form( + template_id=template_id, + external_system_id=ext_system_id, + bo_type=bo_type, + bo_id=bo_id, + parent_id=parent_workspace_node_id, + ) + if response is None: + logger.error( + "Failed to retrieve create information for template -> %s", + template_id, + ) + return None + + logger.debug( + "Successfully retrieved create information for template -> %s", + template_id, + ) + + # Process category information + forms = response["forms"] + + categories_form = {} + + # Typically the the create workspace form delivers 3 forms: + # 1. Form for System Attributes (has no role name) + # 2. Form for Category Data (role name = "categories") + # 3. Form for Classifications (role name = "classifications") + # First we extract these 3 forms: + for form in forms: + if "role_name" in form and form["role_name"] == "categories": + categories_form = form + logger.debug("Found Categories form -> %s", form) + continue + if "role_name" in form and form["role_name"] == "classifications": + logger.debug("Found Classification form -> %s", form) + continue + # the remaining option is that this form is the system attributes form: + logger.debug("Found System Attributes form -> %s", form) + + # We are just interested in the single category data set (role_name = "categories"): + data = categories_form["data"] + logger.debug("Categories data found -> %s", data) + schema = categories_form["schema"]["properties"] + logger.debug("Categories schema found -> %s", schema) + # parallel loop over category data and schema + for cat_data, cat_schema in zip(data, schema): + logger.debug("Category ID -> %s", cat_data) + data_attributes = data[cat_data] + logger.debug("Data Attributes -> %s", data_attributes) + schema_attributes = schema[cat_schema]["properties"] + logger.debug("Schema Attributes -> %s", schema_attributes) + cat_name = schema[cat_schema]["title"] + logger.debug("Category name -> %s", cat_name) + # parallel loop over attribute data and schema + # Sets with one (fixed) row have type = object + # Multi-value Sets with (multiple) rows have type = array and "properties" in "items" schema + # Multi-value attributes have also type = array but NO "properties" in "items" schema + for attr_data, attr_schema in zip(data_attributes, schema_attributes): + logger.debug("Attribute ID -> %s", attr_data) + logger.debug("Attribute Data -> %s", data_attributes[attr_data]) + logger.debug("Attribute Schema -> %s", schema_attributes[attr_schema]) + attr_type = schema_attributes[attr_schema]["type"] + logger.debug("Attribute Type -> %s", attr_type) + if not "title" in schema_attributes[attr_schema]: + logger.debug("Attribute has no title. Skipping...") + continue + # Check if it is an multi-line set: if attr_type == "array" and ( "properties" in schema_attributes[attr_schema]["items"] ): set_name = schema_attributes[attr_schema]["title"] - logger.info("Multi-line Set -> %s", set_name) + logger.debug("Multi-line Set -> %s", set_name) set_data_attributes = data_attributes[ attr_data ] # this is a list [] @@ -5370,528 +7854,2820 @@ def prepare_workspace_create_form( ), None, ) - # stop if there's no payload for the row: - if attribute is None: - logger.info( - "No payload found for set -> %s, row -> %s", - set_name, - row, - ) - # we assume that if there's no payload for row n there will be no payload for rows > n - # and break the while loop: - break - # do we need to create a new row in the data set? - elif row > set_data_max_rows: - # use the row we stored above to create a new empty row: - logger.info( - "Found payload for row -> %s, we need a new data row for it", - row, - ) - logger.info( - "Adding an additional row -> %s to set data -> %s", - row, - set_name, - ) - # add the empty dict to the list: - set_data_attributes.append(dict(first_row)) - set_data_max_rows += 1 - else: - logger.info( - "Found payload for row -> %s %s we can store in existing data row", - row, - set_name, - ) - # traverse all attributes in a single row: - for set_attr_schema in set_schema_attributes: - logger.debug( - "Set Attribute ID -> %s (row -> %s)", - set_attr_schema, - row, - ) - logger.debug( - "Set Attribute Schema -> %s (row -> %s)", - set_schema_attributes[set_attr_schema], - row, - ) - set_attr_type = set_schema_attributes[set_attr_schema][ - "type" - ] - logger.debug( - "Set Attribute Type -> %s (row -> %s)", - set_attr_type, - row, - ) - set_attr_name = set_schema_attributes[set_attr_schema][ - "title" - ] - logger.debug( - "Set Attribute Name -> %s (row -> %s)", - set_attr_name, - row, - ) - # Lookup the attribute with the right category, set, attribute name, and row number in payload: - attribute = next( - ( - item - for item in categories - if ( - item["name"] == cat_name - and "set" - in item # not all items may have a "set" key - and item["set"] == set_name - and item["attribute"] == set_attr_name - and "row" - in item # not all items may have a "row" key - and item["row"] == row - ) - ), - None, - ) - if attribute is None: - logger.warning( - "Set -> %s, Attribute -> %s, Row -> %s not found in payload.", - set_name, - set_attr_name, - row, - ) + # stop if there's no payload for the row: + if attribute is None: + logger.debug( + "No payload found for set -> %s, row -> %s", + set_name, + row, + ) + # we assume that if there's no payload for row n there will be no payload for rows > n + # and break the while loop: + break + # do we need to create a new row in the data set? + elif row > set_data_max_rows: + # use the row we stored above to create a new empty row: + logger.debug( + "Found payload for row -> %s, we need a new data row for it", + row, + ) + logger.debug( + "Adding an additional row -> %s to set data -> '%s'", + row, + set_name, + ) + # add the empty dict to the list: + set_data_attributes.append(dict(first_row)) + set_data_max_rows += 1 + else: + logger.debug( + "Found payload for row -> %s %s we can store in existing data row", + row, + set_name, + ) + # traverse all attributes in a single row: + for set_attr_schema in set_schema_attributes: + logger.debug( + "Set Attribute ID -> %s (row -> %s)", + set_attr_schema, + row, + ) + logger.debug( + "Set Attribute Schema -> %s (row -> %s)", + set_schema_attributes[set_attr_schema], + row, + ) + set_attr_type = set_schema_attributes[set_attr_schema][ + "type" + ] + logger.debug( + "Set Attribute Type -> %s (row -> %s)", + set_attr_type, + row, + ) + set_attr_name = set_schema_attributes[set_attr_schema][ + "title" + ] + logger.debug( + "Set Attribute Name -> %s (row -> %s)", + set_attr_name, + row, + ) + # Lookup the attribute with the right category, set, attribute name, and row number in payload: + attribute = next( + ( + item + for item in categories + if ( + item["name"] == cat_name + and "set" + in item # not all items may have a "set" key + and item["set"] == set_name + and item["attribute"] == set_attr_name + and "row" + in item # not all items may have a "row" key + and item["row"] == row + ) + ), + None, + ) + if attribute is None: + logger.warning( + "Set -> '%s', Attribute -> '%s', Row -> %s not found in payload.", + set_name, + set_attr_name, + row, + ) + + # need to use row - 1 as index starts with 0 but payload rows start with 1 + set_data_attributes[row - 1][set_attr_schema] = "" + else: + logger.debug( + "Set -> '%s', Attribute -> '%s', Row -> %s found in payload, value -> '%s'", + set_name, + set_attr_name, + row, + attribute["value"], + ) + # Put the value from the payload into data structure + # need to use row - 1 as index starts with 0 but payload rows start with 1 + set_data_attributes[row - 1][set_attr_schema] = ( + attribute["value"] + ) + row += 1 # continue the while loop with the next row + # Check if it is single-line set: + elif attr_type == "object": + set_name = schema_attributes[attr_schema]["title"] + logger.debug("Single-line Set -> %s", set_name) + set_data_attributes = data_attributes[attr_data] + logger.debug("Set Data Attributes -> %s", set_data_attributes) + + set_schema_attributes = schema_attributes[attr_schema]["properties"] + logger.debug("Set Schema Attributes -> %s", set_schema_attributes) + for set_attr_data, set_attr_schema in zip( + set_data_attributes, set_schema_attributes + ): + logger.debug("Set Attribute ID -> %s", set_attr_data) + logger.debug( + "Set Attribute Data -> %s", + set_data_attributes[set_attr_data], + ) + logger.debug( + "Set Attribute Schema -> %s", + set_schema_attributes[set_attr_schema], + ) + set_attr_type = set_schema_attributes[set_attr_schema]["type"] + logger.debug("Set Attribute Type -> %s", set_attr_type) + set_attr_name = set_schema_attributes[set_attr_schema]["title"] + logger.debug("Set Attribute Name -> %s", set_attr_name) + # Lookup the attribute with the right category, set and attribute name in payload: + attribute = next( + ( + item + for item in categories + if ( + item["name"] == cat_name + and "set" + in item # not all items may have a "set" key + and item["set"] == set_name + and item["attribute"] == set_attr_name + ) + ), + None, + ) + if attribute is None: + logger.warning( + "Category -> '%s', Set -> %s, Attribute -> '%s' not found in payload.", + cat_name, + set_name, + set_attr_name, + ) + set_data_attributes[set_attr_data] = "" + else: + logger.debug( + "Category -> '%s', Set -> %s, Attribute -> '%s' found in payload, value -> %s", + cat_name, + set_name, + set_attr_name, + attribute["value"], + ) + # Put the value from the payload into data structure + set_data_attributes[set_attr_data] = attribute["value"] + # It is a plain attribute (not inside a set) or it is a multi-value attribute (not inside a set): + else: + attr_name = schema_attributes[attr_schema]["title"] + logger.debug("Attribute Name -> %s", attr_name) + # Lookup the attribute with the right category and attribute name in payload: + attribute = next( + ( + item + for item in categories + if ( + item["name"] == cat_name + and item["attribute"] == attr_name + ) + ), + None, + ) + if attribute is None: + logger.warning( + "Category -> '%s', Attribute -> '%s' not found in payload.", + cat_name, + attr_name, + ) + data_attributes[attr_data] = "" + else: + logger.debug( + "Category -> '%s', Attribute -> '%s' found in payload, value -> %s", + cat_name, + attr_name, + attribute["value"], + ) + # We need to handle a very special case here for Extended ECM for Government + # which has an attribute type "Organizational Unit" (OU). This is referring to a group ID + # which is not stable across deployments. So we need to lookup the Group ID and add it + # to the data structure. This expects that the payload has the Group Name and not the Group ID + if attr_type == str(11480): + logger.debug( + "Attribute -> '%s' is is of type -> Organizational Unit (%s). Looking up group ID for group name -> %s", + attr_name, + attr_type, + attribute["value"], + ) + group = self._otcs.get_group(attribute["value"]) + group_id = self._otcs.lookup_result_value( + group, "name", attribute["value"], "id" + ) + + if group_id: + logger.debug( + "Group for Organizational Unit -> '%s' has ID -> %s", + attribute["value"], + group_id, + ) + # Put the group ID into data structure + data_attributes[attr_data] = str(group_id) + else: + logger.error( + "Group for Organizational Unit -> '%s' does not exist!", + attribute["value"], + ) + # Clear the value to avoid workspace create failure + data_attributes[attr_data] = "" + # handle special case where attribute type is a user picker. + # we expect that the payload includes the login name for this + # (as user IDs are not stable across systems) but then we need + # to lookup the real user ID here: + elif attr_type == "otcs_user_picker": + logger.debug( + "Attribute -> '%s' is is of type -> User Picker (%s). Looking up user ID for user login name -> %s", + attr_name, + attr_type, + attribute["value"], + ) + user = self._otcs.get_user(name=attribute["value"]) + user_id = self._otcs.lookup_result_value( + response=user, + key="name", + value=attribute["value"], + return_key="id", + ) + if user_id: + # User has been found - determine ID: + logger.debug( + "User -> '%s' has ID -> %s", + attribute["value"], + user_id, + ) + # Put the user ID into data structure + data_attributes[attr_data] = str(user_id) + else: + logger.error( + "User with login name -> '%s' does not exist!", + attribute["value"], + ) + # Clear the value to avoid workspace create failure + data_attributes[attr_data] = "" + else: + # Put the value from the payload into data structure + data_attributes[attr_data] = attribute["value"] + category_create_data["categories"][cat_data] = data_attributes + + logger.debug("Category Create Data -> %s", category_create_data) + + return category_create_data + + # end method definition + + def get_salesforce_business_object( + self, + workspace: dict, + object_type: str, + search_field: str, + search_value: str, + ) -> str | None: + """Get the Salesforce ID (str) of an Salesforce object by querying the Salesforce API + + Args: + workspace (dict): Workspace payload + object_type (str): Business Object Type + search_field (str): Search field to find business object in external system. + search_value (str): Search value to find business object in external system. + + Returns: + str | None: technical ID of the business object + """ + + if not self._salesforce: + logger.error( + "Salesforce connection not initialized! Cannot connect to Salesforce API!", + ) + return None + + logger.debug( + "Workspaces is connected to Salesforce and we need to lookup the BO ID...", + ) + salesforce_token = self._salesforce.authenticate() + if not salesforce_token: + logger.error("Failed to authenticate with Salesforce!") + return None + + response = self._salesforce.get_object( + object_type=object_type, + search_field=search_field, + search_value=search_value, + result_fields=["Id"], + ) + num_of_bos = ( + int(response.get("totalSize", 0)) + if (response is not None and "totalSize" in response) + else 0 + ) + if num_of_bos > 1: + logger.warning( + "Salesforce lookup delivered %s values! We will pick the first one.", + str(num_of_bos), + ) + bo_id = self._salesforce.get_result_value(response, "Id") + if not bo_id: + logger.warning( + "Business object of type -> '%s' and %s = %s does not exist in Salesforce!", + object_type, + search_field, + search_value, + ) + logger.info( + "We try to create the Salesforce object of type -> '%s'...", + object_type, + ) + + # Get a helper dict to quickly lookup Salesforce properties + # for given set + attribute name: + property_lookup = self.get_business_object_properties( + bo_type_name=object_type + ) + # In case we couldn't find properties for the given Business Object Type + # we bail out... + if not property_lookup: + logger.warning( + "Cannot create Salesforce object - no business object properties found!", + ) + return None + + categories = workspace.get("categories", []) + parameter_dict = {} + # We process all category entries in workspace payload + # and see if we have a matching mapping to a business property + # in the BO Type definition: + for category in categories: + # generate the lookup key: + key = "" + if "set" in category: + key += category["set"] + "-" + key += category.get("attribute") + # get the attribute value: + value = category.get("value") + # lookup the mapping + mapping = property_lookup.get(key, None) + # Check if we have a mapping: + if mapping: + property_name = mapping.get("@propertyName", None) + logger.debug( + "Found business property -> '%s' for attribute -> '%s'", + property_name, + category.get("attribute"), + ) + parameter_dict[property_name] = value + else: + logger.debug( + "Attribute -> '%s' (key -> %s) does not have a mapped business property.", + category.get("attribute"), + key, + ) + + if not parameter_dict: + logger.warning( + "Cannot create Salesforce object of type -> '%s' - no parameters found!", + object_type, + ) + return None + + logger.info( + "Create Salesforce object of type -> '%s' with parameters -> %s", + object_type, + str(parameter_dict), + ) + # + # Now we try to create the Salesforce object + # + response = self._salesforce.add_object( + object_type=object_type, **parameter_dict + ) + bo_id = self._salesforce.get_result_value(response, "id") + if bo_id: + logger.info( + "Created Salesforce business object with ID -> %s of type -> '%s'", + bo_id, + object_type, + ) + else: + logger.error( + "Failed to create Salesforce business object of type -> '%s'", + object_type, + ) + else: # BO found + logger.debug( + "Retrieved ID -> %s for Salesforce object type -> '%s' (looking up -> '%s' in field -> '%s')", + bo_id, + object_type, + search_value, + search_field, + ) + + return bo_id + + # end method definition + + def prepare_workspace_business_objects( + self, + workspace: dict, + business_objects: list, + ) -> list | None: + """Prepare the business object data for the workspace creation. + This supports multiple external system connections. This methods + also checks if the external system is reachable and tries to create + missing business objects in the leading system if they are missing. + + Args: + workspace (dict): Payload data for the Workspace + business_objects (list): Payload data for the business object connections. + + Returns: + list | None: list of business object data connections (dicts) + """ + + business_object_list = [] + + for business_object_data in business_objects: + business_object = {} + + name = workspace.get("name") + + # Read business object data from workspace payload. + # business_object_data is a dict with 3-5 elements: + if "external_system" in business_object_data: + ext_system_id = business_object_data["external_system"] + else: + logger.error( + "Missing External System in Business Object payload for workspace -> '%s'.", + name, + ) + continue + if "bo_type" in business_object_data: + bo_type = business_object_data["bo_type"] + else: + logger.error( + "Missing Type in Business Object payload for workspace -> '%s'.", + name, + ) + continue + + if "bo_id" in business_object_data: + bo_id = business_object_data["bo_id"] + bo_search_field = None + bo_search_value = None + elif ( + not "bo_search_field" in business_object_data + or not "bo_search_value" in business_object_data + ): + logger.error( + "Missing BO search fields (bo_search_field, bo_search_value) in Business Object payload for workspace -> '%s'.", + name, + ) + continue + else: + bo_search_field = business_object_data["bo_search_field"] + bo_search_value = business_object_data["bo_search_value"] + bo_id = None + + # Check if external system has been declared in payload: + external_system = next( + ( + item + for item in self._external_systems + if (item["external_system_name"] == ext_system_id) + ), + None, + ) + + if not external_system: + logger.warning( + "External System -> '%s' does not exist. Cannot connect workspace -> '%s' to -> %s. Create workspace without connection.", + ext_system_id, + name, + ext_system_id, + ) + continue + elif not external_system.get("reachable"): + logger.warning( + "External System -> '%s' is not reachable. Cannot connect workspace -> '%s' to -> (%s, %s, %s, %s, %s). Create workspace without connection...", + ext_system_id, + name, + ext_system_id, + bo_type, + bo_id, + bo_search_field, + bo_search_value, + ) + continue + external_system_type = external_system.get("external_system_type", "") + + # For Salesforce we need to determine the actual business object ID (technical ID): + if external_system_type == "Salesforce" and not bo_id: + bo_id = self.get_salesforce_business_object( + workspace, + object_type=bo_type, + search_field=bo_search_field, + search_value=bo_search_value, + ) + if not bo_id: + logger.warning( + "Workspace -> '%s' will not be connected to Salesforce as the Business Object ID couldn't be determined (type -> '%s', search_field -> '%s', search_value -> '%s')", + name, + bo_type, + bo_search_field, + bo_search_value, + ) + continue + # end if salesforce + + logger.info( + "Workspace -> '%s' will be connected with external system -> '%s' (%s) with (type -> '%s', ID -> %s)", + name, + external_system_type, + ext_system_id, + bo_type, + bo_id, + ) + + business_object["ext_system_id"] = ext_system_id + business_object["bo_type"] = bo_type + business_object["bo_id"] = bo_id + + business_object_list.append(business_object) + + return business_object_list + + # end method definition + + def process_workspace( + self, + workspace: dict, + ) -> bool: + """Worker thread for workspace creation. + + Args: + workspace (dict): Dictionary with payload of a single workspace. + + Returns: + bool: True = Success, False = Failure + """ + + # Read name from payload: + if not "name" in workspace: + logger.error("Workspace needs a name! Skipping to next workspace...") + return False + name = workspace["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in workspace and not workspace["enabled"]: + logger.info("Payload for Workspace -> '%s' is disabled. Skipping...", name) + return True + + # Read Type Name from payload: + if not "type_name" in workspace: + logger.error( + "Workspace -> '%s' needs a type name! Skipping to next workspace...", + name, + ) + return False + type_name = workspace["type_name"] + + # We need to do this early to find out if we have a cross-application workspace + # and need to continue even if the workspace does exist... + if "business_objects" in workspace and workspace["business_objects"]: + business_objects = workspace["business_objects"] + + business_object_list = self.prepare_workspace_business_objects( + workspace=workspace, business_objects=business_objects + ) + # Check if any of the external systems are avaiable: + if business_object_list: + logger.info( + "Workspace -> '%s' will be connected to -> %s business object(s).", + name, + str(len(business_object_list)), + ) + else: + logger.debug( + "Workspace -> '%s' is not connected to any business object.", + name, + ) + business_object_list = [] + + # Intialize cross-application workspace to "off": + ibo_workspace_id = None + + # check if the workspace has been created before (effort to make the customizing code idem-potent) + logger.debug( + "Check if workspace -> '%s' of type -> '%s' does already exist...", + name, + type_name, + ) + # Check if workspace does already exist + # In case the workspace exists, determine_workspace_id() + # also stores the node ID into workspace["nodeId"] + workspace_id = self.determine_workspace_id(workspace) + if workspace_id: + logger.info( + "Workspace -> '%s' of type -> '%s' does already exist and has ID -> %s!", + name, + type_name, + workspace_id, + ) + # Check if we have an existing workspace that is cross-application. + # In this case we cannot just skip (return). + if len(business_object_list) > 1: + ibo_workspace_id = workspace_id + logger.info( + "Workspace -> '%s' is a cross-application workspace so we cannot skip the creation...", + name, + ) + # We assume the workspace is already conntected to the first BO in the list + # This is a simplifiying assumption and should be enhacned in the future. + business_object_list.pop(0) + else: + logger.info( + "Workspace -> '%s' is NOT a cross-application workspace so we can skip the creation...", + name, + ) + return True + + # Read optional description from payload: + if not "description" in workspace: + description = "" + else: + description = workspace["description"] + + # Parent ID is optional and only required if workspace type does not specify a create location. + # This is typically the case if it is a nested workspace or workspaces of the same type can be created + # in different locations in the Enterprise Workspace: + parent_id = workspace.get("parent_id", None) + + if parent_id is not None: + parent_workspace = next( + (item for item in self._workspaces if item["id"] == parent_id), None + ) + if parent_workspace is None: + logger.error( + "Parent Workspace with logical ID -> %s not found.", + parent_id, + ) + return False + + parent_workspace_node_id = self.determine_workspace_id(parent_workspace) + if not parent_workspace_node_id: + logger.error( + "Parent Workspace without node ID (parent workspace creation may have failed). Skipping to next workspace...", + ) + return False + + logger.debug( + "Parent Workspace with logical ID -> %s has node ID -> %s", + parent_id, + parent_workspace_node_id, + ) + else: + # Alternatively a path could be specified in the payload: + parent_path = workspace.get("parent_path", None) + if parent_path: + logger.info( + "Workspace -> '%s' has parent path -> %s specified in payload.", + name, + parent_path, + ) + response = self._otcs.get_node_by_volume_and_path( + volume_type=141, path=parent_path, create_path=True + ) + parent_workspace_node_id = self._otcs.get_result_value(response, "id") + else: + # if no parent_id is specified the workspace location is determined by the workspace type definition + # and we pass None as parent ID to the get_workspace_create_form and create_workspace methods below: + parent_workspace_node_id = None + logger.info( + "Workspace -> '%s' has no parent path specified in payload.", + name, + ) + + # Find the workspace type with the name given in the payload: + workspace_type = next( + (item for item in self._workspace_types if item["name"] == type_name), + None, + ) + if workspace_type is None: + logger.error( + "Workspace Type -> '%s' not found. Skipping to next workspace...", + type_name, + ) + return False + if workspace_type["templates"] == []: + logger.error( + "Workspace Type -> '%s' does not have templates. Skipping to next workspace...", + type_name, + ) + return False + + # check if the template to be used is specified in the payload: + if "template_name" in workspace: + template_name = workspace["template_name"] + workspace_template = next( + ( + item + for item in workspace_type["templates"] + if item["name"] == template_name + ), + None, + ) + if workspace_template: # does this template exist? + logger.debug( + "Workspace Template -> '%s' has been specified in payload and it does exist.", + template_name, + ) + else: + logger.error( + "Workspace Template -> '%s' has been specified in payload but it doesn't exist!", + template_name, + ) + logger.error( + "Workspace Type -> '%s' has only these templates -> %s", + type_name, + workspace_type["templates"], + ) + return False + # template to be used is NOT specified in the payload - then we just take the first one: + else: + workspace_template = workspace_type["templates"][0] + logger.info( + "Workspace Template has not been specified in payload - we just take the first one (%s)", + workspace_template, + ) + + template_id = workspace_template["id"] + template_name = workspace_template["name"] + workspace_type_id = workspace_type["id"] + + logger.info( + "Create Workspace -> '%s' (type -> '%s') from workspace template -> '%s' (%s)", + name, + type_name, + template_name, + template_id, + ) + + # Handle the case where the workspace is not connected + # to any external system / business object: + if not business_object_list: + business_object_list.append( + { + "ext_system_id": None, + "bo_type": None, + "bo_id": None, + } + ) + + for business_object in business_object_list: + # Read categories from payload: + if not "categories" in workspace: + logger.debug( + "Workspace payload has no category data! Will leave category attributes empty...", + ) + workspace_category_data = {} + else: + categories = workspace["categories"] + workspace_category_data = self.prepare_workspace_create_form( + categories=categories, + template_id=template_id, + ext_system_id=business_object["ext_system_id"], + bo_type=business_object["bo_type"], + bo_id=business_object["bo_id"], + parent_workspace_node_id=parent_workspace_node_id, + ) + if not workspace_category_data: + logger.error( + "Failed to prepare the category data for workspace -> '%s'!", + name, + ) + return False + + if ibo_workspace_id: + logger.info( + "Connect existing workspace '%s' to an additional business object '%s' (IBO)", + name, + business_object["bo_type"], + ) + # Create the workspace with all provided information: + response = self._otcs.create_workspace( + workspace_template_id=template_id, + workspace_name=name, + workspace_description=description, + workspace_type=workspace_type_id, + category_data=workspace_category_data, + external_system_id=business_object["ext_system_id"], + bo_type=business_object["bo_type"], + bo_id=business_object["bo_id"], + parent_id=parent_workspace_node_id, + ibo_workspace_id=ibo_workspace_id, + show_error=( + not self._sap + ), # if SAP is active it may produce workspaces concurrently (race condition). Then we don't want to issue errors. + ) + if response is None: + # Check if workspace has been concurrently created by some other + # process (e.g. via SAP or Salesforce). This would be a race condition + # that seems to really occur. + workspace_id = self.determine_workspace_id(workspace) + if workspace_id: + logger.info( + "Workspace -> '%s' of type -> '%s' has been created by an external process and has ID -> %s!", + name, + type_name, + workspace_id, + ) + workspace["nodeId"] = workspace_id + else: + logger.error( + "Failed to create workspace -> '%s' of type -> %s!", + name, + type_name, + ) + return False + else: + # Now we add the node ID of the new workspace to the payload data structure + # This will be reused when creating the workspace relationships! + if not ibo_workspace_id: + workspace["nodeId"] = self._otcs.get_result_value(response, "id") + ibo_workspace_id = workspace["nodeId"] + + # We also get the name the workspace was finally created with. + # This can be different form the name in the payload as additional + # naming conventions from the Workspace Type definitions may apply. + # This is important to make the python container idem-potent. + response = self._otcs.get_workspace(workspace["nodeId"]) + workspace["name"] = self._otcs.get_result_value(response, "name") + + logger.info( + "Successfully created workspace with final name -> '%s' and node ID -> %s", + workspace["name"], + workspace["nodeId"], + ) + + # if the workspace creation has failed - e.g. error in lookup of business + # object in external system then we continue with the next workspace: + if not "nodeId" in workspace: + logger.error( + "Couldn't create the workspace -> '%s'. Skipping to next workspace...", + workspace["name"], + ) + return False + + # Check if there's an workspace nickname configured: + if "nickname" in workspace: + nickname = workspace["nickname"] + logger.info( + "Assign nickname '%s' to workspace -> '%s' (%s)...", + nickname, + name, + workspace["nodeId"], + ) + response = self._otcs.set_node_nickname( + node_id=workspace["nodeId"], nickname=nickname, show_error=True + ) + if not response: + logger.error( + "Failed to assign nickname -> '%s' to workspace -> '%s'", + nickname, + name, + ) + + # Check if there's an workspace icon/image configured: + if "image_nickname" in workspace: + image_nickname = workspace["image_nickname"] + + response = self._otcs.get_node_from_nickname(image_nickname) + node_id = self._otcs.get_result_value(response, "id") + if node_id: + mime_type = self._otcs.get_result_value(response, "mime_type") + if not mime_type: + logger.warning("Missing mime type information - assuming image/png") + mime_type = "image/png" + file_path = "/tmp/" + image_nickname + self._otcs.download_document(node_id=node_id, file_path=file_path) + response = self._otcs.update_workspace_icon( + workspace_id=workspace["nodeId"], + file_path=file_path, + file_mimetype=mime_type, + ) + if not response: + logger.error( + "Failed to assign icon -> '%s' to workspace -> '%s' from file -> '%s'", + image_nickname, + name, + file_path, + ) + else: + logger.error( + "Cannot find workspace image with nickname -> '%s' for workspace -> '%s'", + image_nickname, + name, + ) + + # Check if an RM classification is specified for the workspace: + # RM Classification is specified as list of path elements (top-down) + if ( + "rm_classification_path" in workspace + and workspace["rm_classification_path"] != [] + ): + rm_class_node = self._otcs.get_node_by_volume_and_path( + 198, workspace["rm_classification_path"] + ) + rm_class_node_id = self._otcs.get_result_value(rm_class_node, "id") + if rm_class_node_id: + response = self._otcs.assign_rm_classification( + workspace["nodeId"], rm_class_node_id, False + ) + if response is None: + logger.error( + "Failed to assign RM classification -> '%s' (%s) to workspace -> '%s'", + workspace["rm_classification_path"][-1], + rm_class_node_id, + name, + ) + else: + logger.info( + "Assigned RM Classification -> '%s' to workspace -> '%s'", + workspace["rm_classification_path"][-1], + name, + ) + # Check if one or multiple classifications are specified for the workspace + # Classifications are specified as list of path elements (top-down) + if ( + "classification_pathes" in workspace + and workspace["classification_pathes"] != [] + ): + for classification_path in workspace["classification_pathes"]: + class_node = self._otcs.get_node_by_volume_and_path( + 198, classification_path + ) + class_node_id = self._otcs.get_result_value(class_node, "id") + if class_node_id: + response = self._otcs.assign_classification( + workspace["nodeId"], [class_node_id], False + ) + if response is None: + logger.error( + "Failed to assign classification -> '%s' to workspace -> '%s'", + class_node_id, + name, + ) + else: + logger.info( + "Assigned Classification -> '%s' to workspace -> '%s'", + classification_path[-1], + name, + ) + + return True + + # end method definition + + def process_workspaces_worker( + self, + partition: pd.DataFrame, + results: list, + ): + """Multi-threading worker method to process a partition of the workspaces. + + Args: + partition (pd.DataFrame): partition of the workspaces to process + results (list): Mutable (shared) list of all workers to collect the results. + """ + + thread_id = threading.get_ident() + + result = {} + result["thread_id"] = thread_id + result["success"] = True + result["failure_counter"] = 0 + result["success_counter"] = 0 + + # Process all datasets in the partion that was given to the thread: + for index, row in partition.iterrows(): + logger.info( + "Processing data row -> %s to create workspace -> '%s'...", + str(index), + row["name"], + ) + success = self.process_workspace(workspace=row.dropna().to_dict()) + if success: + result["success_counter"] += 1 + else: + logger.error( + "Failed to process row -> %s for workspace -> '%s'", + str(index), + row["name"], + ) + result["failure_counter"] += 1 + result["success"] = False + + results.append(result) + + # end method definition + + def process_workspaces(self, section_name: str = "workspaces") -> bool: + """Process workspaces in payload and create them in Extended ECM. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "workspacesPost") + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + + Side Effects: + Set workspace["nodeId] to the node ID of the created workspace + """ + + if not self._workspaces: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + if ENABLE_MULTI_THREADING: + # Create a list to hold the threads + threads = [] + # And another list to collect the results + results = [] + + df = Data(self._workspaces) + + partitions = df.partitionate(THREAD_NUMBER) + + # Create and start a thread for each partition + for index, partition in enumerate(partitions, start=1): + thread = threading.Thread( + name=f"{section_name}_{index:02}", + target=self.thread_wrapper, + args=(self.process_workspaces_worker, partition, results), + ) + # start a thread executing the process_bulk_workspaces_worker() mthod below: + logger.info("Starting Thread -> %s...", str(thread.name)) + thread.start() + threads.append(thread) + + # Wait for all threads to complete + for thread in threads: + logger.info("Waiting for Thread -> %s to complete...", str(thread.name)) + thread.join() + logger.info("Thread -> %s has completed.", str(thread.name)) + + # Check if all threads have completed without error / failure. + # If there's a single failure in on of the thread results we + # set 'success' variable to False. + for result in results: + if not result["success"]: + logger.info( + "Thread ID -> %s had %s failures and completed %s workspaces successfully!", + result["thread_id"], + result["failure_counter"], + result["success_counter"], + ) + success = False # mark the complete processing as "failure" for the status file. + else: + logger.info( + "Thread ID -> %s completed %s workspaces successfully!", + result["thread_id"], + result["success_counter"], + ) + else: # no multi-threading + for workspace in self._workspaces: + result = self.process_workspace(workspace=workspace) + success = ( + success and result + ) # if a single result is False then mark this in 'success' variable. + + self.write_status_file(success, section_name, self._workspaces) + + return success + + # end method definition + + def process_workspace_relationship(self, workspace: dict) -> bool: + """Worker thread for workspace relationship creation. + + Args: + workspace (dict): Dictionary with payload of a single workspace. + + Returns: + bool: True = Success, False = Failure + """ + + # Read name from payload: + if not "name" in workspace: + return False + name = workspace["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in workspace and not workspace["enabled"]: + logger.info("Payload for Workspace -> '%s' is disabled. Skipping...", name) + return True + + # Read relationships from payload: + if not "relationships" in workspace: + logger.debug( + "Workspace -> '%s' has no relationships. Skipping to next workspace...", + name, + ) + return True + + # Check that workspaces actually have a logical ID - + # otherwise we cannot establish the relationship: + if not "id" in workspace: + logger.warning( + "Workspace without ID cannot have a relationship. Skipping to next workspace...", + ) + return False + + workspace_id = workspace["id"] + logger.info("Workspace -> '%s' has relationships - creating...", name) + + # now determine the actual node IDs of the workspaces (have been created above): + workspace_node_id = self.determine_workspace_id(workspace) + if not workspace_node_id: + logger.warning( + "Workspace without node ID cannot have a relationship (workspace creation may have failed). Skipping to next workspace...", + ) + return False + + logger.debug( + "Workspace with logical ID -> %s has node ID -> %s", + workspace_id, + workspace_node_id, + ) + + success: bool = True + + for related_workspace_id in workspace["relationships"]: + # Find the workspace type with the name given in the payload: + related_workspace = next( + ( + item + for item in self._workspaces + if item["id"] == related_workspace_id + ), + None, + ) + if related_workspace is None: + logger.error( + "Related Workspace with logical ID -> %s not found.", + related_workspace_id, + ) + success = False + continue + + if "enabled" in related_workspace and not related_workspace["enabled"]: + logger.info( + "Payload for Related Workspace -> '%s' is disabled. Skipping...", + related_workspace["name"], + ) + continue + + related_workspace_node_id = self.determine_workspace_id(related_workspace) + if not related_workspace_node_id: + logger.warning( + "Related Workspace without node ID (workspaces creation may have failed). Skipping to next workspace...", + ) + continue + + logger.debug( + "Related Workspace with logical ID -> %s has node ID -> %s", + related_workspace_id, + related_workspace_node_id, + ) + + # Check if relationship does already exists: + response = self._otcs.get_workspace_relationships( + workspace_id=workspace_node_id + ) + + existing_workspace_relationship = self._otcs.exist_result_item( + response, "id", related_workspace_node_id + ) + if existing_workspace_relationship: + logger.info( + "Workspace relationship between workspace ID -> %s and related workspace ID -> %s does already exist. Skipping...", + workspace_node_id, + related_workspace_node_id, + ) + continue + + logger.info( + "Create Workspace Relationship between workspace node ID -> %s and workspace node ID -> %s", + workspace_node_id, + related_workspace_node_id, + ) + + response = self._otcs.create_workspace_relationship( + workspace_node_id, related_workspace_node_id + ) + if not response: + logger.error("Failed to create workspace relationship.") + success = False + else: + logger.info("Successfully created workspace relationship.") + + # end for relationships + + return success + + # end method definition + + def process_workspace_relationships_worker( + self, + partition: pd.DataFrame, + results: list, + ): + """Multi-threading worker method to process a partition of the workspaces + to create workspace relationships. + + Args: + partition (pd.DataFrame): partition of the workspaces to process + results (list): Mutable (shared) list of all workers to collect the results. + """ + + thread_id = threading.get_ident() + + result = {} + result["thread_id"] = thread_id + result["success"] = True + result["failure_counter"] = 0 + result["success_counter"] = 0 + + # Process all datasets in the partion that was given to the thread: + for index, row in partition.iterrows(): + logger.info( + "Processing data row -> %s to create relationships for workspace -> '%s'...", + str(index), + row["name"], + ) + success = self.process_workspace_relationship( + workspace=row.dropna().to_dict() + ) + if success: + result["success_counter"] += 1 + else: + logger.error( + "Failed to process row -> %s for relationships of workspace -> '%s'", + str(index), + row["name"], + ) + result["failure_counter"] += 1 + result["success"] = False + + results.append(result) + + # end method definition + + def process_workspace_relationships( + self, section_name: str = "workspaceRelationships" + ) -> bool: + """Process workspaces relationships in payload and create them in Extended ECM. + + Relationships can only be created if all workspaces have been created before. + Once a workspace got created, the node ID of that workspaces has been added + to the payload["workspaces"] data structure (see process_workspaces()) + Relationships are created between the node IDs of two business workspaces + (and not the logical IDs in the inital payload specification) + + Args: + section_name (str, optional): name of the section. + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._workspaces: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + if ENABLE_MULTI_THREADING: + # Create a list to hold the threads + threads = [] + # And another list to collect the results + results = [] + + df = Data(self._workspaces) + + partitions = df.partitionate(THREAD_NUMBER) + + # Create and start a thread for each partition + for index, partition in enumerate(partitions, start=1): + thread = threading.Thread( + name=f"{section_name}_{index:02}", + target=self.thread_wrapper, + args=( + self.process_workspace_relationships_worker, + partition, + results, + ), + ) + # start a thread executing the process_bulk_workspaces_worker() mthod below: + logger.info("Starting Thread -> %s...", str(thread.name)) + thread.start() + threads.append(thread) + + # Wait for all threads to complete + for thread in threads: + logger.info("Waiting for Thread -> %s to complete...", str(thread.name)) + thread.join() + logger.info("Thread -> %s has completed.", str(thread.name)) + + # Check if all threads have completed without error / failure. + # If there's a single failure in on of the thread results we + # set 'success' variable to False. + for result in results: + if not result["success"]: + logger.info( + "Thread ID -> %s had %s failures and completed relationships for %s workspaces successfully!", + result["thread_id"], + result["failure_counter"], + result["success_counter"], + ) + success = False # mark the complete processing as "failure" for the status file. + else: + logger.info( + "Thread ID -> %s completed relationships for %s workspace successfully!", + result["thread_id"], + result["success_counter"], + ) + else: # no multi-threading + for workspace in self._workspaces: + result = self.process_workspace_relationship(workspace=workspace) + success = ( + success and result + ) # if a single result is False then mark this in 'success' variable. + + self.write_status_file(success, section_name, self._workspaces) + + return success + + # end method definition + + def process_workspace_members(self, section_name: str = "workspaceMembers") -> bool: + """Process workspaces members in payload and create them in Extended ECM. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._workspaces: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for workspace in self._workspaces: + # Read name from payload (just for logging): + if not "name" in workspace: + continue + workspace_name = workspace["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in workspace and not workspace["enabled"]: + logger.info( + "Payload for Workspace -> '%s' is disabled. Skipping...", + workspace_name, + ) + continue + + # Read members from payload: + if not "members" in workspace: + logger.info( + "Workspace -> '%s' has no members in payload. Skipping to next workspace...", + workspace_name, + ) + continue + members = workspace["members"] + + workspace_id = workspace["id"] + logger.info( + "Workspace -> '%s' has memberships in payload - establishing...", + workspace_name, + ) + + workspace_node_id = int(self.determine_workspace_id(workspace)) + if not workspace_node_id: + logger.warning( + "Workspace without node ID cannot have members (workspaces creation may have failed). Skipping to next workspace..." + ) + continue + + # now determine the actual node IDs of the workspaces (have been created by process_workspaces()): + workspace_node = self._otcs.get_node(workspace_node_id) + workspace_owner_id = self._otcs.get_result_value( + workspace_node, "owner_user_id" + ) + workspace_owner_name = self._otcs.get_result_value(workspace_node, "owner") + + workspace_roles = self._otcs.get_workspace_roles(workspace_node_id) + if workspace_roles is None: + logger.debug( + "Workspace with ID -> %s and node Id -> %s has no roles. Skipping to next workspace...", + workspace_id, + workspace_node_id, + ) + continue + + # We don't want the workspace creator to be in the leader role + # of automatically created workspaces - this can happen because the + # creator gets added to the leader role automatically: + leader_role_id = self._otcs.lookup_result_value( + workspace_roles, "leader", True, "id" + ) + + if leader_role_id: + leader_role_name = self._otcs.lookup_result_value( + workspace_roles, "leader", True, "name" + ) + response = self._otcs.remove_workspace_member( + workspace_id=workspace_node_id, + role_id=leader_role_id, + member_id=workspace_owner_id, + show_warning=False, + ) + if response: + logger.info( + "Removed creator user -> '%s' (%s) from leader role -> '%s' (%s) of workspace -> '%s'", + workspace_owner_name, + workspace_owner_id, + leader_role_name, + leader_role_id, + workspace_name, + ) + + logger.info( + "Adding members to workspace with ID -> %s and node ID -> %s defined in payload...", + workspace_id, + workspace_node_id, + ) + + for member in members: + # read user list and role name from payload: + member_users = ( + member["users"] if member.get("users") else [] + ) # be careful to avoid key errors as users are optional + member_groups = ( + member["groups"] if member.get("groups") else [] + ) # be careful to avoid key errors as groups are optional + member_role_name = member.get("role", "") + member_clear = member.get("clear", False) + + if member_role_name == "": # role name is required + logger.error( + "Members of workspace -> '%s' is missing the role name.", + workspace_name, + ) + success = False + continue + + role_id = self._otcs.lookup_result_value( + workspace_roles, "name", member_role_name, "id" + ) + if role_id is None: + # if member_role is None: + logger.error( + "Workspace -> '%s' does not have a role with name -> '%s'", + workspace_name, + member_role_name, + ) + success = False + continue + logger.debug("Role -> '%s' has ID -> %s", member_role_name, role_id) + + # check if we want to clear (remove) existing members of this role: + if member_clear: + logger.info( + "Clear existing members of role -> '%s' (%s) for workspace -> '%s' (%s)", + member_role_name, + role_id, + workspace_name, + workspace_id, + ) + self._otcs.remove_workspace_members( + workspace_id=workspace_node_id, role_id=role_id + ) + + if ( + member_users == [] and member_groups == [] + ): # we either need users or groups (or both) + logger.debug( + "Role -> '%s' of workspace -> '%s' does not have any members (no users nor groups).", + member_role_name, + workspace_name, + ) + continue + + # Process users as workspaces members: + for member_user in member_users: + # find member user in current payload: + member_user_id = next( + (item for item in self._users if item["name"] == member_user), + {}, + ) + if member_user_id: + user_id = member_user_id["id"] + else: + # If this didn't work, try to get the member user from OTCS. This covers + # cases where the user is system generated or part + # of a former payload processing (thus not in the current payload): + logger.debug( + "Member -> '%s' not found in current payload - check if it exists in OTCS already...", + member_user, + ) + response = self._otcs.get_user(name=member_user) + user_id = self._otcs.lookup_result_value( + response, key="name", value=member_user, return_key="id" + ) + if not user_id: + logger.error( + "Cannot find member user with login -> '%s'. Skipping...", + member_user, + ) + continue + + # Add member if it does not yet exists - suppress warning + # message if user is already in role: + response = self._otcs.add_workspace_member( + workspace_id=workspace_node_id, + role_id=int(role_id), + member_id=user_id, + show_warning=False, + ) + if response is None: + logger.error( + "Failed to add user -> '%s' (%s) as member to role -> '%s' of workspace -> '%s'", + member_user, + user_id, + member_role_name, + workspace_name, + ) + success = False + else: + logger.info( + "Successfully added user -> '%s' (%s) as member to role -> '%s' of workspace -> '%s'", + member_user, # member_user_id["name"], + user_id, + member_role_name, + workspace_name, + ) + + # Process groups as workspaces members: + for member_group in member_groups: + member_group_id = next( + (item for item in self._groups if item["name"] == member_group), + None, + ) + if member_group_id is None: + logger.error( + "Cannot find group with name -> '%s'", member_group + ) + success = False + continue + group_id = member_group_id["id"] + + response = self._otcs.add_workspace_member( + workspace_id=workspace_node_id, + role_id=int(role_id), + member_id=group_id, + show_warning=False, + ) + if response is None: + logger.error( + "Failed to add group -> '%s' (%s) to role -> '%s' of workspace -> '%s'", + member_group_id["name"], + group_id, + member_role_name, + workspace_name, + ) + success = False + else: + logger.info( + "Successfully added group -> '%s' (%s) to role -> '%s' of workspace -> '%s'", + member_group_id["name"], + group_id, + member_role_name, + workspace_name, + ) + + # Optionally the payload may have a permission list for the role + # to change the default permission from the workspace template + # to something more specific: + member_permissions = member.get("permissions", []) + if member_permissions == []: + logger.debug( + "No permission change for workspace -> '%s' and role -> '%s'", + workspace_name, + member_role_name, + ) + continue + + logger.info( + "Update permissions of workspace -> '%s' (%s) and role -> '%s' to -> %s", + workspace_name, + str(workspace_node_id), + member_role_name, + str(member_permissions), + ) + response = self._otcs.assign_permission( + node_id=workspace_node_id, + assignee_type="custom", + assignee=role_id, + permissions=member_permissions, + apply_to=2, + ) + if not response: + logger.error( + "Failed to update permissions of workspace -> '%s' (%s) and role -> '%s' to -> %s.", + workspace_name, + str(workspace_node_id), + member_role_name, + str(member_permissions), + ) + success = False + + self.write_status_file(success, section_name, self._workspaces) + + return success + + # end method definition + + def process_workspace_member_permissions( + self, section_name: str = "workspaceMemberPermissions" + ) -> bool: + """Process workspaces members in payload and set their permissions. + We need this separate from process_workspace_members() with also + sets permissions (if in payload) as we add documents to workspaces with + content transports and these documents don't inherit role permissions + (this is a transport limitation) + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._workspaces: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for workspace in self._workspaces: + # Read name from payload (just for logging): + if not "name" in workspace: + continue + workspace_name = workspace["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in workspace and not workspace["enabled"]: + logger.info( + "Payload for Workspace -> '%s' is disabled. Skipping...", + workspace_name, + ) + continue + + # Read members from payload: + if not "members" in workspace: + logger.info( + "Workspace -> '%s' has no members in payload. No need to update permissions. Skipping to next workspace...", + workspace_name, + ) + continue + members = workspace["members"] + + workspace_id = workspace["id"] + workspace_node_id = int(self.determine_workspace_id(workspace)) + if not workspace_node_id: + logger.warning( + "Workspace without node ID cannot cannot get permission changes (workspaces creation may have failed). Skipping to next workspace..." + ) + continue + + workspace_roles = self._otcs.get_workspace_roles(workspace_node_id) + if workspace_roles is None: + logger.info( + "Workspace with ID -> %s and node Id -> %s has no roles to update permissions. Skipping to next workspace...", + workspace_id, + workspace_node_id, + ) + continue + + for member in members: + # read user list and role name from payload: + member_users = ( + member["users"] if member.get("users") else [] + ) # be careful to avoid key errors as users are optional + member_groups = ( + member["groups"] if member.get("groups") else [] + ) # be careful to avoid key errors as groups are optional + member_role_name = member["role"] + + if member_role_name == "": # role name is required + logger.error( + "Members of workspace -> '%s' is missing the role name.", + workspace_name, + ) + success = False + continue + if ( + member_users == [] and member_groups == [] + ): # we either need users or groups (or both) + logger.debug( + "Role -> '%s' of workspace -> '%s' does not have any members (no users nor groups).", + member_role_name, + workspace_name, + ) + continue + + role_id = self._otcs.lookup_result_value( + workspace_roles, "name", member_role_name, "id" + ) + if role_id is None: + logger.error( + "Workspace -> '%s' does not have a role with name -> '%s'", + workspace_name, + member_role_name, + ) + success = False + continue + logger.debug("Role -> '%s' has ID -> %s", member_role_name, role_id) + + member_permissions = member.get("permissions", []) + if member_permissions == []: + logger.debug( + "No permission change for workspace -> '%s' and role -> '%s'.", + workspace_name, + member_role_name, + ) + continue + + logger.info( + "Update permissions of workspace -> '%s' (%s) and role -> '%s' to -> %s", + workspace_name, + str(workspace_node_id), + member_role_name, + str(member_permissions), + ) + response = self._otcs.assign_permission( + node_id=workspace_node_id, + assignee_type="custom", + assignee=role_id, + permissions=member_permissions, + apply_to=2, + ) + if not response: + logger.error( + "Failed to update permissions of workspace -> '%s' (%s) and role -> '%s' to -> %s.", + workspace_name, + str(workspace_node_id), + member_role_name, + str(member_permissions), + ) + success = False + + self.write_status_file(success, section_name, self._workspaces) + + return success + + # end method definition + + def process_workspace_aviators( + self, section_name: str = "workspaceAviators" + ) -> bool: + """Process workspaces Content Aviator settings in payload and enable Aviator for selected workspaces. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._workspaces: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for workspace in self._workspaces: + # Read name from payload (just for logging): + if not "name" in workspace: + continue + workspace_name = workspace["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in workspace and not workspace["enabled"]: + logger.info( + "Payload for Workspace -> '%s' is disabled. Skipping...", + workspace_name, + ) + continue + + # Read Aviator setting from payload: + if not "enable_aviator" in workspace or not workspace["enable_aviator"]: + logger.info( + "Aviator is not enabled for Workspace -> '%s'. Skipping to next workspace...", + workspace_name, + ) + continue + + # We cannot just lookup with workspace.get("nodeId") as the customizer + # may have been restarted inbetween - so we use our proper determine_workspace_id + # here... + workspace_id = self.determine_workspace_id(workspace) + if not workspace_id: + logger.error( + "Cannot find node ID for workspace -> '%s'. Workspace creation may have failed. Skipping to next workspace...", + workspace_name, + ) + success = False + continue + + # Make code idem-potent and check if Aviator is already enabled + # for this workspace: + if self._otcs.check_workspace_aviator(workspace_id=workspace_id): + logger.info( + "Skip workspace -> '%s' (%s) as Aviator is already enabled...", + workspace_name, + workspace_id, + ) + continue + + # Now enable the Content Aviator for the workspace: + response = self._otcs.update_workspace_aviator(workspace_id, True) + if not response: + logger.error( + "Failed to enable Content Aviator for workspace -> '%s' (%s)", + workspace_name, + workspace_id, + ) + success = False + continue + + self.write_status_file(success, section_name, self._workspaces) + + return success + + # end method definition + + def process_web_reports( + self, web_reports: list, section_name: str = "webReports" + ) -> bool: + """Process web reports in payload and run them in Extended ECM. + + Args: + web_reports (list): list of web reports. As we have two different list (pre and post) + we need to pass the actual list as parameter. + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "webReportsPost") + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if a restart of the OTCS pods is required. False otherwise. + """ + + if not web_reports: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return False # important to return False here as otherwise we are triggering a restart of services!! + + # If this payload section has been processed successfully before we + # can return False and skip processing it once more: + if self.check_status_file(section_name): + return False # important to return False here as otherwise we are triggering a restart of services!! + + restart_required: bool = False + success: bool = True + + for web_report in web_reports: + nick_name = web_report.get("nickname", None) + if not nick_name: + logger.error("Web Report payload needs a nickname! Skipping...") + continue + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in web_report and not web_report["enabled"]: + logger.info( + "Payload for Web Report -> '%s' is disabled. Skipping...", nick_name + ) + continue + + description = web_report.get("description", "") + restart = web_report.get("restart", False) + + if not self._otcs.get_node_from_nickname(nick_name): + logger.error( + "Web Report with nickname -> '%s' does not exist! Skipping...", + nick_name, + ) + success = False + continue + + # be careful to avoid key errors as Web Report parameters are optional: + actual_params = ( + web_report["parameters"] if web_report.get("parameters") else {} + ) + formal_params = self._otcs.get_web_report_parameters(nick_name) + if actual_params: + logger.info( + "Running Web Report -> '%s' (%s) with parameters -> %s ...", + nick_name, + description, + actual_params, + ) + # Do some sanity checks to see if the formal and actual parameters are matching... + # Check 1: are there formal parameters at all? + if not formal_params: + logger.error( + "Web Report -> '%s' is called with actual parameters but it does not expect parameters! Skipping...", + nick_name, + ) + success = False + continue + lets_continue = False + # Check 2: Iterate through the actual parameters given in the payload + # and see if there's a matching formal parameter expected by the Web Report: + for key, value in actual_params.items(): + # Check if there's a matching formal parameter defined on the Web Report node: + formal_param = next( + (item for item in formal_params if item["parm_name"] == key), + None, + ) + if formal_param is None: + logger.error( + "Web Report -> '%s' is called with parameter -> '%s' that is not expected! Value: %s) Skipping...", + nick_name, + key, + value, + ) + success = False + lets_continue = True # we cannot do a "continue" here directly as we are in an inner loop + # Check 3: Iterate through the formal parameters and validate there's a matching + # actual parameter defined in the payload for each mandatory formal parameter + # that does not have a default value: + for formal_param in formal_params: + if ( + (formal_param["mandatory"] is True) + and (formal_param["default_value"] is None) + and not actual_params.get(formal_param["parm_name"]) + ): + logger.error( + "Web Report -> '%s' is called without mandatory parameter -> %s! Skipping...", + nick_name, + formal_param["parm_name"], + ) + success = False + lets_continue = True # we cannot do a "continue" here directly as we are in an inner loop + # Did any of the checks fail? + if lets_continue: + continue + # Actual parameters are validated, we can run the Web Report: + response = self._otcs.run_web_report(nick_name, actual_params) + else: + logger.info( + "Running Web Report -> '%s' (%s) without parameters...", + nick_name, + description, + ) + # Check if there's a formal parameter that is mandatory but + # does not have a default value: + if formal_params: + required_param = next( + ( + item + for item in formal_params + if (item["mandatory"] is True) + and (not item["default_value"]) + ), + None, + ) + if required_param: + logger.error( + "Web Report -> '%s' is called without parameters but has a mandatory parameter -> '%s' without a default value! Skipping...", + nick_name, + required_param["parm_name"], + ) + success = False + continue + else: # we are good to proceed! + logger.debug( + "Web Report -> '%s' does not have a mandatory parameter without a default value!", + nick_name, + ) + response = self._otcs.run_web_report(nick_name) + if response is None: + logger.error( + "Failed to run web report with nickname -> '%s'", nick_name + ) + success = False + + if restart: + restart_required = True + + self.write_status_file(success, section_name, web_reports) + + return restart_required + + # end method definition + + def process_cs_applications( + self, otcs_object: OTCS, section_name: str = "csApplications" + ) -> bool: + """Process CS applications in payload and install them in Extended ECM. + The CS Applications need to be installed in all frontend and backends. + + Args: + otcs_object (object): this can either be the OTCS frontend or OTCS backend. If None + then the otcs_backend is used. + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._cs_applications: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + # OTCS backend is the default: + if not otcs_object: + otcs_object = self._otcs_backend + + for cs_application in self._cs_applications: + application_name = cs_application["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in cs_application and not cs_application["enabled"]: + logger.info( + "Payload for CS Application -> '%s' is disabled. Skipping...", + application_name, + ) + continue + + application_description = cs_application["description"] + + logger.info( + "Install CS Application -> '%s' (%s)...", + application_name, + application_description, + ) + response = otcs_object.install_cs_application(application_name) + if response is None: + logger.error( + "Failed to install CS Application -> '%s'!", application_name + ) + success = False + + self.write_status_file(success, section_name, self._cs_applications) + + return success + + # end method definition + + def process_user_settings(self, section_name: str = "userSettings") -> bool: + """Process user settings in payload and apply them in OTDS. + This includes password settings and user display settings. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for user in self._users: + user_name = user["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in user and not user["enabled"]: + logger.info( + "Payload for User -> '%s' is disabled. Skipping...", user_name + ) + continue + + user_partition = self._otcs.config()["partition"] + if not user_partition: + logger.error("User partition not found!") + success = False + continue + + # Set the OTDS display name. Extended ECM does not use this but + # it makes AppWorks display users correctly (and it doesn't hurt) + # We only set this if firstname _and_ last name are in the payload: + if "firstname" in user and "lastname" in user: + user_display_name = user["firstname"] + " " + user["lastname"] + response = self._otds.update_user( + user_partition, user_name, "displayName", user_display_name + ) + if response: + logger.info( + "Display name for user -> '%s' has been updated to -> '%s'", + user_name, + user_display_name, + ) + else: + logger.error( + "Display name for user -> '%s' could not be updated to -> '%s'", + user_name, + user_display_name, + ) + success = False + + # Don't enforce the user to reset password at first login (settings in OTDS): + logger.info("Don't enforce password change for user -> '%s'...", user_name) + response = self._otds.update_user( + user_partition, user_name, "UserMustChangePasswordAtNextSignIn", "False" + ) + if not response: + success = False + + response = self._otds.update_user( + user_partition, user_name, "UserCannotChangePassword", "True" + ) + if not response: + success = False + + # Set user password to never expire + response = self._otds.update_user( + user_partition, user_name, "PasswordNeverExpires", "True" + ) + if not response: + success = False + + self.write_status_file(success, section_name, self._users) + + return success + + # end method definition + + def process_user_favorites_and_profiles( + self, section_name: str = "userFavoritesAndProfiles" + ) -> bool: + """Process user favorites in payload and create them in Extended ECM. + This method also simulates browsing the favorites to populate the + widgets on the landing pages and sets personal preferences. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + # We can only set favorites if we impersonate / authenticate as the user. + # The following code (for loop) will change the authenticated user - we need to + # switch it back to admin user later so we safe the admin credentials for this: + + if self._users: + # save admin credentials for later switch back to admin user: + admin_credentials = self._otcs.credentials() + else: + admin_credentials = {} + + for user in self._users: + user_name = user["name"] + + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in user and not user["enabled"]: + logger.info( + "Payload for User -> '%s' is disabled. Skipping...", user_name + ) + continue + + user_password = user["password"] + + # we change the otcs credentials to the user: + self._otcs.set_credentials(user_name, user_password) + + # we re-authenticate as the user: + logger.info("Authenticate user -> '%s'...", user_name) + # True = force new login with new user + cookie = self._otcs.authenticate(revalidate=True) + if not cookie: + logger.error("Couldn't authenticate user -> %s", user_name) + success = False + continue + + # we update the user profile to activate responsive (dynamic) containers: + response = self._otcs.update_user_profile( + field="responsiveContainerMode", + value=True, + config_section="SmartUI", + ) + if response is None: + logger.warning( + "Profile for user -> '%s' couldn't be updated with responsive container mode'!", + user_name, + ) + else: + logger.info( + "Profile for user -> '%s' has been updated to enable responsive container mode.", + user_name, + ) + response = self._otcs.update_user_profile( + field="responsiveContainerMessageMode", + value=True, + config_section="SmartUI", + ) + if response is None: + logger.warning( + "Profile for user -> '%s' couldn't be updated with responsive container message mode'!", + user_name, + ) + else: + logger.info( + "Profile for user -> '%s' has been updated to enable messages for responsive container mode.", + user_name, + ) - # need to use row - 1 as index starts with 0 but payload rows start with 1 - set_data_attributes[row - 1][set_attr_schema] = "" - else: - logger.info( - "Set -> %s, Attribute -> %s, Row -> %s found in payload, value -> %s", - set_name, - set_attr_name, - row, - attribute["value"], - ) - # Put the value from the payload into data structure - # need to use row - 1 as index starts with 0 but payload rows start with 1 - set_data_attributes[row - 1][set_attr_schema] = ( - attribute["value"] - ) - row += 1 # continue the while loop with the next row - # Check if it is single-line set: - elif attr_type == "object": - set_name = schema_attributes[attr_schema]["title"] - logger.info("Single-line Set -> %s", set_name) - set_data_attributes = data_attributes[attr_data] - logger.debug("Set Data Attributes -> %s", set_data_attributes) + restrict_personal_workspace = user.get("restrict_personal_workspace", False) + if restrict_personal_workspace: + # We let the user restrict itself to have read-only access to its + # personal workspace: + node = self._otcs.get_node_by_volume_and_path(142, []) + node_id = self._otcs.get_result_value(node, "id") + if node_id: + logger.info( + "Restricting Personal Workspace of user -> '%s' to read-only.", + user_name, + ) + response = self._otcs.assign_permission( + int(node_id), "owner", 0, ["see", "see_contents"], 2 + ) - set_schema_attributes = schema_attributes[attr_schema]["properties"] - logger.debug("Set Schema Attributes -> %s", set_schema_attributes) - for set_attr_data, set_attr_schema in zip( - set_data_attributes, set_schema_attributes - ): - logger.debug("Set Attribute ID -> %s", set_attr_data) - logger.debug( - "Set Attribute Data -> %s", - set_data_attributes[set_attr_data], - ) - logger.debug( - "Set Attribute Schema -> %s", - set_schema_attributes[set_attr_schema], - ) - set_attr_type = set_schema_attributes[set_attr_schema]["type"] - logger.debug("Set Attribute Type -> %s", set_attr_type) - set_attr_name = set_schema_attributes[set_attr_schema]["title"] - logger.debug("Set Attribute Name -> %s", set_attr_name) - # Lookup the attribute with the right category, set and attribute name in payload: - attribute = next( - ( - item - for item in categories - if ( - item["name"] == cat_name - and "set" - in item # not all items may have a "set" key - and item["set"] == set_name - and item["attribute"] == set_attr_name - ) - ), - None, + # we work through the list of favorites defined for the user: + favorites = user.get("favorites", []) + for favorite in favorites: + # check if favorite is a logical workspace name + favorite_item = next( + (item for item in self._workspaces if item["id"] == favorite), None + ) + is_workspace = False + if favorite_item: + logger.info( + "Found favorite item (workspace) in payload -> %s", + favorite_item["name"], + ) + favorite_id = self.determine_workspace_id(favorite_item) + if not favorite_id: + logger.warning( + "Workspace of type -> '%s' and name -> '%s' does not exist. Cannot create favorite. Skipping...", + favorite_item["type_name"], + favorite_item["name"], ) - if attribute is None: - logger.warning( - "Set -> %s, Attribute -> %s not found in payload.", - set_name, - set_attr_name, - ) - set_data_attributes[set_attr_data] = "" - else: - logger.info( - "Set -> %s, Attribute -> %s found in payload, value -> %s", - set_name, - set_attr_name, - attribute["value"], - ) - # Put the value from the payload into data structure - set_data_attributes[set_attr_data] = attribute["value"] - # It is a plain attribute (not inside a set) or it is a multi-value attribute (not inside a set): + continue + + is_workspace = True else: - attr_name = schema_attributes[attr_schema]["title"] - logger.debug("Attribute Name -> %s", attr_name) - # Lookup the attribute with the right category and attribute name in payload: - attribute = next( - ( - item - for item in categories - if ( - item["name"] == cat_name - and item["attribute"] == attr_name - ) - ), - None, - ) - if attribute is None: + # alternatively try to find the item as a nickname: + favorite_item = self._otcs.get_node_from_nickname(favorite) + favorite_id = self._otcs.get_result_value(favorite_item, "id") + favorite_type = self._otcs.get_result_value(favorite_item, "type") + + if favorite_type == 848: + is_workspace = True + + # if favorite_item is None: + if favorite_id is None: logger.warning( - "Attribute -> %s not found in payload.", attr_name + "Favorite -> '%s' neither found as workspace ID nor as nickname. Skipping to next favorite...", + favorite, ) - data_attributes[attr_data] = "" + continue + + response = self._otcs.add_favorite(favorite_id) + if response is None: + logger.warning( + "Favorite ID -> %s couldn't be added for user -> %s!", + favorite_id, + user_name, + ) + else: + logger.info( + "Added favorite for user -> %s, node ID -> %s.", + user_name, + favorite_id, + ) + logger.info( + "Simulate user -> '%s' browsing node ID -> %s.", + user_name, + favorite_id, + ) + # simulate a browse by the user to populate recently accessed items + if is_workspace: + response = self._otcs.get_workspace(favorite_id) else: - logger.info( - "Attribute -> %s found in payload, value -> %s", - attr_name, - attribute["value"], - ) - # We need to handle a very special case here for Extended ECM for Government - # which has an attribute type "Organizational Unit" (OU). This is referring to a group ID - # which is not stable across deployments. So we need to lookup the Group ID and add it - # to the data structure. This expects that the payload has the Group Name and not the Group ID - if attr_type == str(11480): - logger.info( - "Attribute -> %s is is of type -> Organizational Unit (%s). Looking up group ID for group name -> %s", - attr_name, - attr_type, - attribute["value"], - ) - group = self._otcs.get_group(attribute["value"]) - group_id = self._otcs.lookup_result_value( - group, "name", attribute["value"], "id" - ) + response = self._otcs.get_node(favorite_id) - if group_id: - logger.info( - "Group for Organizational Unit -> %s has ID -> %s", - attribute["value"], - group_id, - ) - # Put the group ID into data structure - data_attributes[attr_data] = str(group_id) - else: - logger.error( - "Group for Organizational Unit -> %s does not exist!", - attribute["value"], - ) - # Clear the value to avoid workspace create failure - data_attributes[attr_data] = "" - # handle special case where attribute type is a user picker. - # we expect that the payload includes the login name for this - # (as user IDs are not stable across systems) but then we need - # to lookup the real user ID here: - elif attr_type == "otcs_user_picker": - logger.info( - "Attribute -> %s is is of type -> User Picker (%s). Looking up user ID for user login name -> %s", - attr_name, - attr_type, - attribute["value"], - ) - user = self._otcs.get_user(attribute["value"]) - user_id = self._otcs.lookup_result_value( - response=user, - key="name", - value=attribute["value"], - return_key="id", - ) - if user_id: - # User has been found - determine ID: - logger.info( - "User -> %s has ID -> %s", - attribute["value"], - user_id, - ) - # Put the user ID into data structure - data_attributes[attr_data] = str(user_id) - else: - logger.error( - "User with login name -> %s does not exist!", - attribute["value"], - ) - # Clear the value to avoid workspace create failure - data_attributes[attr_data] = "" - else: - # Put the value from the payload into data structure - data_attributes[attr_data] = attribute["value"] - category_create_data["categories"][cat_data] = data_attributes + # we work through the list of proxies defined for the user + # (we need to consider that not all users have the proxies element): + proxies = user["proxies"] if user.get("proxies") else [] + + for proxy in proxies: + proxy_user = next( + (item for item in self._users if item["name"] == proxy), + None, + ) + if not proxy_user or not "id" in proxy_user: + logger.error( + "The proxy -> '%s' for user -> '%s' does not exist! Skipping proxy...", + proxy, + user_name, + ) + success = False + continue + proxy_user_id = proxy_user["id"] + + # Check if the proxy is already set: + if not self._otcs.is_proxy(proxy): + logger.info( + "Set user -> '%s' (%s) as proxy for user -> '%s'.", + proxy, + proxy_user_id, + user_name, + ) + # set the user proxy - currently we don't support time based proxies in payload. + # The called method is ready to support this. + response = self._otcs.add_user_proxy(proxy_user_id) + else: + logger.info( + "User -> '%s' (%s) is already proxy for user -> '%s'. Skipping...", + proxy, + proxy_user_id, + user_name, + ) + if self._users: + # Set back admin credentials: + self._otcs.set_credentials( + admin_credentials["username"], admin_credentials["password"] + ) - logger.debug("Category Create Data -> %s", category_create_data) + # we re-authenticate as the admin user: + logger.info( + "Authenticate as admin user -> '%s'...", admin_credentials["username"] + ) + # True = force new login with new user + cookie = self._otcs.authenticate(revalidate=True) - return category_create_data + # Also for the admin user we want to update the user profile to activate responsive (dynamic) containers: + response = self._otcs.update_user_profile( + field="responsiveContainerMode", + value=True, + config_section="SmartUI", + ) + if response is None: + logger.warning( + "Profile for admin user couldn't be updated with responsive container mode'!", + ) + else: + logger.info( + "Profile for admin user has been updated to enable responsive container mode.", + ) + response = self._otcs.update_user_profile( + field="responsiveContainerMessageMode", + value=True, + config_section="SmartUI", + ) + if response is None: + logger.warning( + "Profile for admin user couldn't be updated with responsive container message mode'!", + ) + else: + logger.info( + "Profile for admin user has been updated to enable messages for responsive container mode.", + ) - # end method definition + self.write_status_file(success, section_name, self._users) - def get_salesforce_business_object( - self, workspace: dict, object_type: str, search_field: str, search_value: str - ) -> str | None: - """_summary_ + return success + + # end method definition + + def process_security_clearances( + self, section_name: str = "securityClearances" + ) -> bool: + """Process Security Clearances for Extended ECM. Args: - workspace (dict): Workspace payload - object_type (str): Business Object Type - search_field (str): Search field to find business object in external system. - search_value (str): Search value to find business object in external system. + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._security_clearances: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for security_clearance in self._security_clearances: + if not "level" in security_clearance: + logger.error( + "Security Clearance requires a level in the payload. Skipping." + ) + continue + clearance_level = security_clearance.get("level") + if not "name" in security_clearance: + logger.error( + "Security Clearance requires a name in the payload. Skipping." + ) + continue + clearance_name = security_clearance.get("name") + + if "enabled" in security_clearance and not security_clearance["enabled"]: + logger.info( + "Payload for Security Clearance -> '%s' is disabled. Skipping...", + clearance_name, + ) + continue + + clearance_description = security_clearance.get("description") + if not clearance_description: + clearance_description = "" + if clearance_level and clearance_name: + logger.info( + "Creating Security Clearance -> '%s' : %s", + clearance_level, + clearance_name, + ) + self._otcs.run_web_report( + "web_report_security_clearance", security_clearance + ) + else: + logger.error( + "Cannot create Security Clearance - either level or name is missing!" + ) + success = False + + self.write_status_file(success, section_name, self._security_clearances) + + return success + + # end method definition + + def process_supplemental_markings( + self, section_name: str = "supplementalMarkings" + ) -> bool: + """Process Supplemental Markings for Extended ECM. + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace Returns: - str | None: technical ID of the business object + bool: True if payload has been processed without errors, False otherwise """ - if not self._salesforce: - logger.error( - "Salesforce connection not initialized! Cannot connect to Salesforce API!" - ) - return None + if not self._supplemental_markings: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True - logger.info( - "Workspaces is connected to Salesforce and we need to lookup the BO ID..." - ) - salesforce_token = self._salesforce.authenticate() - if not salesforce_token: - logger.error("Failed to authenticate with Salesforce!") - return None + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True - response = self._salesforce.get_object( - object_type=object_type, - search_field=search_field, - search_value=search_value, - result_fields=["Id"], - ) - bo_id = self._salesforce.get_result_value(response, "Id") - if not bo_id: - logger.warning( - "Business object of type -> %s and %s = %s does not exist in Salesforce!", - object_type, - search_field, - search_value, - ) - logger.info("We try to create the Salesforce object...") + success: bool = True - # Geta helper dict to quickly lookup Salesforce properties - # for given set + attribute name: - property_lookup = self.get_business_object_properties( - bo_type_name=object_type - ) - categories = workspace.get("categories", []) - parameter_dict = {} - # We process all category entries in workspace payload - # and see if we have a matching mapping to a business property - # in the BO Type definition: - for category in categories: - # generate the lookup key: - key = "" - if "set" in category: - key += category["set"] + "-" - key += category.get("attribute") - # get the attribute value: - value = category.get("value") - # lookup the mapping - mapping = property_lookup.get(key, None) - # Check if we have a mapping: - if mapping: - property_name = mapping.get("@propertyName", None) - logger.info( - "Found business property -> %s for attribute -> %s", - property_name, - category.get("attribute"), - ) - parameter_dict[property_name] = value - else: - logger.info( - "Attribute -> %s (key -> %s) does not have a mapped business property.", - category.get("attribute"), - key, - ) + for supplemental_marking in self._supplemental_markings: + code = supplemental_marking.get("code") - if not parameter_dict: - logger.warning("Cannot create Salesforce object - no parameters found") - return None + if ( + "enabled" in supplemental_marking + and not supplemental_marking["enabled"] + ): + logger.info( + "Payload for Supplemental Marking -> '%s' is disabled. Skipping...", + code, + ) + continue + + description = supplemental_marking.get("description") + if not description: + description = "" + if code: + logger.info( + "Creating Supplemental Marking -> '%s' : %s", code, description + ) + self._otcs.run_web_report( + "web_report_supplemental_marking", supplemental_marking + ) + else: + logger.error( + "Cannot create Supplemental Marking - either code or description is missing!" + ) + success = False + + self.write_status_file(success, section_name, self._supplemental_markings) + + return success + + # end method definition + + def process_user_security(self, section_name: str = "userSecurity"): + """Process Security Clearance and Supplemental Markings for Extended ECM users. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for user in self._users: + user_id = user.get("id") + user_name = user.get("name") - logger.info( - "Create Salesforce object of type -> %s with parameters -> %s", - object_type, - str(parameter_dict), - ) - # - # Now we try to create the Salesforce object - # - response = self._salesforce.add_object( - object_type=object_type, **parameter_dict - ) - bo_id = self._salesforce.get_result_value(response, "id") - if bo_id: + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in user and not user["enabled"]: logger.info( - "Created Salesforce business object with ID -> %s of type -> %s ", - bo_id, - object_type, + "Payload for User -> '%s' is disabled. Skipping...", user_name ) - else: - logger.error( - "Failed to create Salesforce business object of type -> %s", - object_type, + continue + + # Read security clearance from user payload (it is optional!) + user_security_clearance = user.get("security_clearance") + if user_id and user_security_clearance: + self._otcs.assign_user_security_clearance( + user_id, user_security_clearance ) - else: - logger.info( - "Retrieved ID -> %s for Salesforce object type -> %s (looking up -> %s in field -> %s)", - bo_id, - object_type, - search_field, - search_value, - ) - return bo_id + # Read supplemental markings from user payload (it is optional!) + user_supplemental_markings = user.get("supplemental_markings") + if user_id and user_supplemental_markings: + self._otcs.assign_user_supplemental_markings( + user_id, user_supplemental_markings + ) - # end method definition + self.write_status_file(success, section_name, self._users) - def prepare_workspace_business_objects( - self, workspace: dict, business_objects: list - ) -> list | None: - """Prepare the business object data for the workspace creation. - This supports multiple external system connections. This methods - also checks if the external system is reachable and tries to create - missing business objects in the leading system if they are missing. + return success - Args: - workspace (dict): Payload data for the Workspace - business_objects (list): Payload data for the business object connections. + # end method definition + def process_records_management_settings( + self, section_name: str = "recordsManagementSettings" + ) -> bool: + """Process Records Management Settings for Extended ECM. + The setting files need to be placed in the OTCS file system file via + a transport into the Support Asset Volume. + + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace Returns: - list | None: list of business object data connections (dicts) + bool: True if payload has been processed without errors, False otherwise """ - business_object_list = [] + if not self._records_management_settings: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True - for business_object_data in business_objects: - business_object = {} + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True - name = workspace.get("name") + success: bool = True - # Read business object data from workspace payload. - # business_object_data is a dict with 3-5 elements: - if "external_system" in business_object_data: - ext_system_id = business_object_data["external_system"] - else: - logger.error( - "Missing External System in Business Object payload for workspace -> %s.", - name, - ) - continue - if "bo_type" in business_object_data: - bo_type = business_object_data["bo_type"] - else: - logger.error( - "Missing Type in Business Object payload for workspace -> %s.", - name, - ) - continue + if ( + "records_management_system_settings" in self._records_management_settings + and self._records_management_settings["records_management_system_settings"] + != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings[ + "records_management_system_settings" + ] + ) + response = self._otcs.import_records_management_settings(filename) + if not response: + success = False - if "bo_id" in business_object_data: - bo_id = business_object_data["bo_id"] - bo_search_field = None - bo_search_value = None - elif ( - not "bo_search_field" in business_object_data - or not "bo_search_value" in business_object_data - ): - logger.error( - "Missing BO search fields (bo_search_field, bo_search_value) in Business Object payload for workspace -> %s.", - name, - ) - continue - else: - bo_search_field = business_object_data["bo_search_field"] - bo_search_value = business_object_data["bo_search_value"] - bo_id = None + if ( + "records_management_codes" in self._records_management_settings + and self._records_management_settings["records_management_codes"] != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings["records_management_codes"] + ) + response = self._otcs.import_records_management_codes(filename) + if not response: + success = False - # Check if external system has been declared in payload: - external_system = next( - ( - item - for item in self._external_systems - if (item["external_system_name"] == ext_system_id) - ), - None, + if ( + "records_management_rsis" in self._records_management_settings + and self._records_management_settings["records_management_rsis"] != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings["records_management_rsis"] ) + response = self._otcs.import_records_management_rsis(filename) + if not response: + success = False - if not external_system: - logger.warning( - "External System -> %s does not exist. Cannot connect workspace -> %s to -> %s. Create workspace without connection.", - ext_system_id, - name, - ext_system_id, - ) - continue - elif not external_system.get("reachable"): - logger.warning( - "External System -> %s is not reachable. Cannot connect workspace -> %s to -> (%s, %s, %s, %s, %s). Create workspace without connection...", - ext_system_id, - name, - ext_system_id, - bo_type, - bo_id, - bo_search_field, - bo_search_value, - ) - continue - external_system_type = external_system.get("external_system_type", "") + if ( + "physical_objects_system_settings" in self._records_management_settings + and self._records_management_settings["physical_objects_system_settings"] + != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings["physical_objects_system_settings"] + ) + response = self._otcs.import_physical_objects_settings(filename) + if not response: + success = False - logger.info( - "Workspace -> %s will be connected with external system -> %s (%s) with (type -> %s, id -> %s, search_field -> %s, search_value -> %s)", - name, - external_system_type, - ext_system_id, - bo_type, - bo_id, - bo_search_field, - bo_search_value, + if ( + "physical_objects_codes" in self._records_management_settings + and self._records_management_settings["physical_objects_codes"] != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings["physical_objects_codes"] ) + response = self._otcs.import_physical_objects_codes(filename) + if not response: + success = False - # For Salesforce we need to determine the actual business object ID (technical ID): - if external_system_type == "Salesforce" and not bo_id: - bo_id = self.get_salesforce_business_object( - workspace, - object_type=bo_type, - search_field=bo_search_field, - search_value=bo_search_value, - ) - if not bo_id: - continue + if ( + "physical_objects_locators" in self._records_management_settings + and self._records_management_settings["physical_objects_locators"] != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings["physical_objects_locators"] + ) + response = self._otcs.import_physical_objects_locators(filename) + if not response: + success = False - business_object["ext_system_id"] = ext_system_id - business_object["bo_type"] = bo_type - business_object["bo_id"] = bo_id + if ( + "security_clearance_codes" in self._records_management_settings + and self._records_management_settings["security_clearance_codes"] != "" + ): + filename = ( + self._custom_settings_dir + + self._records_management_settings["security_clearance_codes"] + ) + response = self._otcs.import_security_clearance_codes(filename) + if not response: + success = False - logger.info("Add external system -> %s to list.", bo_type) - business_object_list.append(business_object) + self.write_status_file(success, section_name, self._records_management_settings) - return business_object_list + return success - def process_workspaces(self, section_name: str = "workspaces") -> bool: - """Process workspaces in payload and create them in Extended ECM. + # end method definition + + def process_holds(self, section_name: str = "holds") -> bool: + """Process Records Management Holds for Extended ECM users. Args: section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type - are used (e.g. the "Post" sections like "workspacesPost") + are used (e.g. the "Post" sections) This name is also used for the "success" status files written to the Admin Personal Workspace - Returns: - bool: True if payload has been processed without errors, False otherwise - - Side Effects: - Set workspace["nodeId] to the node ID of the created workspace + Returns: + bool: True if payload has been processed without errors, False otherwise """ - if not self._workspaces: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._holds: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -5901,429 +10677,205 @@ def process_workspaces(self, section_name: str = "workspaces") -> bool: success: bool = True - for workspace in self._workspaces: - # Read name from payload: - if not "name" in workspace: - logger.error("Workspace needs a name! Skipping to next workspace...") + for hold in self._holds: + if not "name" in hold: + logger.error("Cannot create Hold without a name! Skipping...") + continue + hold_name = hold["name"] + + if not "type" in hold: + logger.error( + "Cannot create Hold -> '%s' without a type! Skipping...", hold_name + ) success = False continue - name = workspace["name"] + hold_type = hold["type"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in workspace and not workspace["enabled"]: + if "enabled" in hold and not hold["enabled"]: logger.info( - "Payload for Workspace -> %s is disabled. Skipping...", name + "Payload for Hold -> '%s' is disabled. Skipping...", hold_name ) continue - # Read Type Name from payload: - if not "type_name" in workspace: + hold_group = hold.get("group") + hold_comment = hold.get("comment", "") + hold_alternate_id = hold.get("alternate_id") + hold_date_applied = hold.get("date_applied") + hold_date_to_remove = hold.get("date_to_remove") + + # 550 is the RM Volume + response = self._otcs.get_node_by_volume_and_path(550, ["Hold Maintenance"]) + if not response: + logger.error("Cannot find Records Management Volume!") + continue + holds_maintenance_id = self._otcs.get_result_value(response, "id") + if not holds_maintenance_id: logger.error( - "Workspace -> %s needs a type name! Skipping to next workspace...", - name, + "Cannot find Holds Maintenance folder in Records Management Volume!" ) - success = False continue - type_name = workspace["type_name"] - # We need to do this early to find out if we have a cross-application workspace - # and need to continue even if the workspace does exist... - if "business_objects" in workspace and workspace["business_objects"]: - business_objects = workspace["business_objects"] - - business_object_list = self.prepare_workspace_business_objects( - workspace=workspace, business_objects=business_objects + if hold_group: + # Check if the Hold Group (folder) does already exist. + response = self._otcs.get_node_by_parent_and_name( + holds_maintenance_id, hold_group ) - # Check if any of the external systems are avaiable: - if business_object_list: - logger.info( - "Workspace -> %s will be connected to -> %s business object(s).", - name, - str(len(business_object_list)), + parent_id = self._otcs.get_result_value(response, "id") + if not parent_id: + response = self._otcs.create_item( + holds_maintenance_id, "833", hold_group ) + parent_id = self._otcs.get_result_value(response, "id") + if not parent_id: + logger.error("Failed to create hold group -> '%s'", hold_group) + continue else: - logger.info( - "Workspace -> %s is not connected to any business object.", name - ) - business_object_list = [] - - # Intialize cross-application workspace to "off": - ibo_workspace_id = None + parent_id = holds_maintenance_id - # check if the workspace has been created before (effort to make the customizing code idem-potent) - logger.info( - "Check if workspace -> %s of type -> %s does already exist...", - name, - type_name, - ) - # Check if workspace does already exist - # In case the workspace exists, determine_workspace_id() - # also stores the node ID into workspace["nodeId"] - workspace_id = self.determine_workspace_id(workspace) - if workspace_id: - logger.info( - "Workspace -> %s of type -> %s does already exist and has ID -> %s!", - name, - type_name, - workspace_id, - ) - # Check if we have an existing workspace that is cross-application. - # In this case we cannot just continue. - if len(business_object_list) > 1: - ibo_workspace_id = workspace_id - logger.info( - "This is a cross-application workspace so we cannot skip the creation..." - ) - # We assume the workspace is already conntected to the first BO in the list - # This is a simplifiying assumption and should be enahcned in the future. - business_object_list.pop(0) - else: - logger.info( - "This workspace is NOT a cross-application workspace so we can skip the creation..." - ) - continue + # Holds are special - they ahve folders that cannot be traversed + # in the normal way - we need to get the whole list of holds and use + # specialparameters for the exist_result_items() method as the REST + # API calls delivers a results->data->holds structure (not properties) + response = self._otcs.get_records_management_holds() + if self._otcs.exist_result_item( + response, "HoldName", hold_name, property_name="holds" + ): + logger.info("Hold -> '%s' does already exist. Skipping...", hold_name) + continue - logger.info( - "Creating new Workspace -> %s; Workspace Type -> %s...", name, type_name + hold = self._otcs.create_records_management_hold( + hold_type=hold_type, + name=hold_name, + comment=hold_comment, + alternate_id=hold_alternate_id, + parent_id=int(parent_id), + date_applied=hold_date_applied, + date_to_remove=hold_date_to_remove, ) - # Read optional description from payload: - if not "description" in workspace: - description = "" - else: - description = workspace["description"] - - # Parent ID is optional and only required if workspace type does not specify a create location. - # This is typically the case if it is a nested workspace or workspaces of the same type can be created - # in different locations in the Enterprise Workspace: - parent_id = workspace["parent_id"] if workspace.get("parent_id") else None - - if parent_id is not None: - parent_workspace = next( - (item for item in self._workspaces if item["id"] == parent_id), None - ) - if parent_workspace is None: - logger.error( - "Parent Workspace with logical ID -> %s not found.", parent_id - ) - success = False - continue - - parent_workspace_node_id = self.determine_workspace_id(parent_workspace) - if not parent_workspace_node_id: - logger.warning( - "Parent Workspace without node ID (parent workspace creation may have failed). Skipping to next workspace..." - ) - success = False - continue - + if hold and hold["holdID"]: logger.info( - "Parent Workspace with logical ID -> %s has node ID -> %s", - parent_id, - parent_workspace_node_id, + "Successfully created hold -> '%s' with ID -> %s", + hold_name, + hold["holdID"], ) else: - # if no parent_id is specified the workspace location is determined by the workspace type definition - # and we pass None as parent ID to the get_workspace_create_form and create_workspace methods below: - parent_workspace_node_id = None - - # Find the workspace type with the name given in the payload: - workspace_type = next( - (item for item in self._workspace_types if item["name"] == type_name), - None, - ) - if workspace_type is None: - logger.error( - "Workspace Type -> %s not found. Skipping to next workspace...", - type_name, - ) success = False - continue - if workspace_type["templates"] == []: - logger.error( - "Workspace Type -> %s does not have templates. Skipping to next workspace...", - type_name, - ) - success = False - continue - # check if the template to be used is specified in the payload: - if "template_name" in workspace: - template_name = workspace["template_name"] - workspace_template = next( - ( - item - for item in workspace_type["templates"] - if item["name"] == template_name - ), - None, - ) - if workspace_template: # does this template exist? - logger.info( - "Workspace Template -> %s has been specified in payload and it does exist.", - template_name, - ) - else: - logger.error( - "Workspace Template -> %s has been specified in payload but it doesn't exist!", - template_name, - ) - logger.error( - "Workspace Type -> %s has only these templates -> %s", - type_name, - workspace_type["templates"], - ) - success = False - continue - # template to be used is NOT specified in the payload - then we just take the first one: - else: - workspace_template = workspace_type["templates"][0] - logger.info( - "Workspace Template has not been specified in payload - we just take the first one (%s)", - workspace_template, - ) + self.write_status_file(success, section_name, self._holds) - template_id = workspace_template["id"] - template_name = workspace_template["name"] - workspace_type_id = workspace_type["id"] + return success - logger.info( - "Create Workspace -> %s (type -> %s) from workspace template -> %s (ID -> %s)", - name, - type_name, - template_name, - template_id, - ) + # end method definition - # Handle the case where the workspace is not connected - # to any external system / business object: - if not business_object_list: - business_object_list.append( - { - "ext_system_id": None, - "bo_type": None, - "bo_id": None, - } - ) + def process_additional_group_members( + self, section_name: str = "additionalGroupMemberships" + ) -> bool: + """Process additional groups memberships we want to have in OTDS. - for business_object in business_object_list: - # Read categories from payload: - if not "categories" in workspace: - logger.info( - "Workspace payload has no category data! Will leave category attributes empty..." - ) - category_create_data = {} - else: - categories = workspace["categories"] - category_create_data = self.prepare_workspace_create_form( - categories=categories, - template_id=template_id, - ext_system_id=business_object["ext_system_id"], - bo_type=business_object["bo_type"], - bo_id=business_object["bo_id"], - parent_workspace_node_id=parent_workspace_node_id, - ) + Args: + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ - if ibo_workspace_id: - logger.info( - "Connect existing workspace to an additional business object (IBO)" - ) - # Create the workspace with all provided information: - response = self._otcs.create_workspace( - workspace_template_id=template_id, - workspace_name=name, - workspace_description=description, - workspace_type=workspace_type_id, - category_data=category_create_data, - external_system_id=business_object["ext_system_id"], - bo_type=business_object["bo_type"], - bo_id=business_object["bo_id"], - parent_id=parent_workspace_node_id, - ibo_workspace_id=ibo_workspace_id, - show_error=( - not self._sap - ), # if SAP is active it may produce workspaces concurrently (race condition). Then we don't want to issue errors. - ) - if response is None: - # Check if workspace has been concurrently created by some other - # process (e.g. via SAP or Salesforce). This would be a race condition - # that seems to really occur. - workspace_id = self.determine_workspace_id(workspace) - if workspace_id: - logger.info( - "Workspace -> %s of type -> %s has been created by an external process and has ID -> %s!", - name, - type_name, - workspace_id, - ) - else: - logger.error( - "Failed to create workspace -> %s of type -> %s!", - name, - type_name, - ) - success = False - continue - else: - # Now we add the node ID of the new workspace to the payload data structure - # This will be reused when creating the workspace relationships! - if not ibo_workspace_id: - workspace["nodeId"] = self._otcs.get_result_value( - response, "id" - ) - ibo_workspace_id = workspace["nodeId"] + if not self._additional_group_members: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + return True - # We also get the name the workspace was finally created with. - # This can be different form the name in the payload as additional - # naming conventions from the Workspace Type definitions may apply. - # This is important to make the python container idem-potent. - response = self._otcs.get_workspace(workspace["nodeId"]) - workspace["name"] = self._otcs.get_result_value( - response, "name" - ) + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True - logger.info( - "Successfully created workspace with final name -> %s and node ID -> %s", - workspace["name"], - workspace["nodeId"], - ) + success: bool = True + + for additional_group_member in self._additional_group_members: + if not "parent_group" in additional_group_member: + logger.error("Missing parent_group! Skipping...") + continue + parent_group = additional_group_member["parent_group"] - # Check if there's an workspace nickname configured: - if "nickname" in workspace: - nickname = workspace["nickname"] + if ( + "enabled" in additional_group_member + and not additional_group_member["enabled"] + ): logger.info( - "Assign nickname %s to workspace -> %s (%s)...", - nickname, - name, - workspace["nodeId"], - ) - response = self._otcs.set_node_nickname( - node_id=workspace["nodeId"], nickname=nickname, show_error=True + "Payload for Additional Group Member with Parent Group -> '%s' is disabled. Skipping...", + parent_group, ) - if not response: - logger.error( - "Failed to assign nickname -> %s to workspace -> %s", - nickname, - name, - ) - - # Check if there's an workspace icon/image configured: - if "image_nickname" in workspace: - image_nickname = workspace["image_nickname"] + continue - response = self._otcs.get_node_from_nickname(image_nickname) - node_id = self._otcs.get_result_value(response, "id") - if node_id: - mime_type = self._otcs.get_result_value(response, "mime_type") - if not mime_type: - logger.warning( - "Missing mime type information - assuming image/png" - ) - mime_type = "image/png" - file_path = "/tmp/" + image_nickname - self._otcs.download_document(node_id=node_id, file_path=file_path) - response = self._otcs.update_workspace_icon( - workspace_id=workspace["nodeId"], - file_path=file_path, - file_mimetype=mime_type, - ) - if not response: - logger.error( - "Failed to assign icon -> %s to workspace -> %s from file -> %s ", - image_nickname, - name, - file_path, - ) - else: + if (not "user_name" in additional_group_member) and ( + not "group_name" in additional_group_member + ): + logger.error( + "Either group_name or user_name need to be specified! Skipping..." + ) + success = False + continue + if "group_name" in additional_group_member: + group_name = additional_group_member["group_name"] + logger.info( + "Adding group -> '%s' to parent group -> '%s' in OTDS.", + group_name, + parent_group, + ) + response = self._otds.add_group_to_parent_group( + group_name, parent_group + ) + if not response: logger.error( - "Cannot find workspace image with nickname -> %s for workspace -> %s", - image_nickname, - name, + "Failed to add group -> '%s' to parent group -> '%s' in OTDS.", + group_name, + parent_group, ) - - # Check if an RM classification is specified for the workspace: - # RM Classification is specified as list of path elements (top-down) - if ( - "rm_classification_path" in workspace - and workspace["rm_classification_path"] != [] - ): - rm_class_node = self._otcs.get_node_by_volume_and_path( - 198, workspace["rm_classification_path"] + success = False + elif "user_name" in additional_group_member: + user_name = additional_group_member["user_name"] + logger.info( + "Adding user -> '%s' to group -> '%s' in OTDS.", + user_name, + parent_group, ) - rm_class_node_id = self._otcs.get_result_value(rm_class_node, "id") - if rm_class_node_id: - response = self._otcs.assign_rm_classification( - workspace["nodeId"], rm_class_node_id, False - ) - if response is None: - logger.error( - "Failed to assign RM classification -> %s (%s) to workspace -> %s", - workspace["rm_classification_path"][-1], - rm_class_node_id, - name, - ) - else: - logger.info( - "Assigned RM Classification -> %s to workspace -> %s", - workspace["rm_classification_path"][-1], - name, - ) - # Check if one or multiple classifications are specified for the workspace - # Classifications are specified as list of path elements (top-down) - if ( - "classification_pathes" in workspace - and workspace["classification_pathes"] != [] - ): - for classification_path in workspace["classification_pathes"]: - class_node = self._otcs.get_node_by_volume_and_path( - 198, classification_path + response = self._otds.add_user_to_group(user_name, parent_group) + if not response: + logger.error( + "Failed to add user -> '%s' to group -> '%s' in OTDS.", + user_name, + parent_group, ) - class_node_id = self._otcs.get_result_value(class_node, "id") - if class_node_id: - response = self._otcs.assign_classification( - workspace["nodeId"], [class_node_id], False - ) - if response is None: - logger.error( - "Failed to assign classification -> %s to workspace -> %s", - class_node_id, - name, - ) - else: - logger.info( - "Assigned Classification -> %s to workspace -> %s", - classification_path[-1], - name, - ) + success = False - self.write_status_file(success, section_name, self._workspaces) + self.write_status_file(success, section_name, self._additional_group_members) return success - # end method definition + # end method definition - def process_workspace_relationships( - self, section_name: str = "workspaceRelationships" + def process_additional_access_role_members( + self, section_name: str = "additionalAccessRoleMemberships" ) -> bool: - """Process workspaces relationships in payload and create them in Extended ECM. - - Relationships can only be created if all workspaces have been created before. - Once a workspace got created, the node ID of that workspaces has been added - to the payload["workspaces"] data structure (see process_workspaces()) - Relationships are created between the node IDs of two business workspaces - (and not the logical IDs in the inital payload specification) + """Process additional access role memberships we want to have in OTDS. Args: - section_name (str, optional): name of the section. + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections) This name is also used for the "success" status files written to the Admin Personal Workspace Returns: bool: True if payload has been processed without errors, False otherwise """ - if not self._workspaces: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._additional_access_role_members: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -6333,129 +10885,90 @@ def process_workspace_relationships( success: bool = True - for workspace in self._workspaces: - # Read name from payload: - if not "name" in workspace: - continue - name = workspace["name"] - - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in workspace and not workspace["enabled"]: - logger.info( - "Payload for Workspace -> %s is disabled. Skipping...", name - ) + for additional_access_role_member in self._additional_access_role_members: + if not "access_role" in additional_access_role_member: + logger.error("Missing access_role! Skipping...") continue + access_role = additional_access_role_member["access_role"] - # Read relationships from payload: - if not "relationships" in workspace: + if ( + "enabled" in additional_access_role_member + and not additional_access_role_member["enabled"] + ): logger.info( - "Workspace -> %s has no relationships. Skipping to next workspace...", - name, - ) - continue - - # Check that workspaces actually have a logical ID - - # otherwise we cannot establish the relationship: - if not "id" in workspace: - logger.warning( - "Workspace without ID cannot have a relationship. Skipping to next workspace..." + "Payload for Additional Member for AccessRole -> '%s' is disabled. Skipping...", + access_role, ) continue - workspace_id = workspace["id"] - logger.info("Workspace -> %s has relationships - creating...", name) - - workspace_node_id = self.determine_workspace_id(workspace) - if not workspace_node_id: - logger.warning( - "Workspace without node ID cannot have a relationship (workspace creation may have failed). Skipping to next workspace..." + if ( + (not "user_name" in additional_access_role_member) + and (not "group_name" in additional_access_role_member) + and (not "partition_name" in additional_access_role_member) + ): + logger.error( + "Either group_name or user_name need to be specified! Skipping..." ) + success = False continue - # now determine the actual node IDs of the workspaces (have been created above): - logger.info( - "Workspace with logical ID -> %s has node ID -> %s", - workspace_id, - workspace_node_id, - ) - - for related_workspace_id in workspace["relationships"]: - # Find the workspace type with the name given in the payload: - related_workspace = next( - ( - item - for item in self._workspaces - if item["id"] == related_workspace_id - ), - None, + if "group_name" in additional_access_role_member: + group_name = additional_access_role_member["group_name"] + logger.info( + "Adding group -> '%s' to access role -> '%s' in OTDS.", + group_name, + access_role, ) - if related_workspace is None: + response = self._otds.add_group_to_access_role(access_role, group_name) + if not response: logger.error( - "Related Workspace with logical ID -> %s not found.", - related_workspace_id, + "Failed to add group -> '%s' to access role -> '%s' in OTDS.", + group_name, + access_role, ) success = False - continue - - if "enabled" in related_workspace and not related_workspace["enabled"]: - logger.info( - "Payload for Related Workspace -> %s is disabled. Skipping...", - related_workspace["name"], - ) - continue - - related_workspace_node_id = self.determine_workspace_id( - related_workspace - ) - if not related_workspace_node_id: - logger.warning( - "Related Workspace without node ID (workspaces creation may have failed). Skipping to next workspace..." - ) - continue - + elif "user_name" in additional_access_role_member: + user_name = additional_access_role_member["user_name"] logger.info( - "Related Workspace with logical ID -> %s has node ID -> %s", - related_workspace_id, - related_workspace_node_id, + "Adding user -> '%s' to access role -> '%s' in OTDS.", + user_name, + access_role, ) - + response = self._otds.add_user_to_access_role(access_role, user_name) + if not response: + logger.error( + "Failed to add user -> '%s' to access role -> '%s' in OTDS.", + user_name, + access_role, + ) + success = False + elif "partition_name" in additional_access_role_member: + partition_name = additional_access_role_member["partition_name"] logger.info( - "Create Workspace Relationship between workspace node ID -> %s and workspace node ID -> %s", - workspace_node_id, - related_workspace_node_id, - ) - - # Check if relationship does already exists: - response = self._otcs.get_workspace_relationships(workspace_node_id) - - existing_workspace_relationship = self._otcs.exist_result_item( - response, "id", related_workspace_node_id + "Adding partition -> '%s' to access role -> '%s' in OTDS.", + partition_name, + access_role, ) - if existing_workspace_relationship: - logger.info( - "Workspace relationship between workspace ID -> %s and related workspace ID -> %s does already exist. Skipping...", - workspace_node_id, - related_workspace_node_id, - ) - continue - - response = self._otcs.create_workspace_relationship( - workspace_node_id, related_workspace_node_id + response = self._otds.add_partition_to_access_role( + access_role, partition_name ) if not response: - logger.error("Failed to create workspace relationship.") + logger.error( + "Failed to add partition -> '%s' to access role -> '%s' in OTDS.", + partition_name, + access_role, + ) success = False - else: - logger.info("Successfully created workspace relationship.") - self.write_status_file(success, section_name, self._workspaces) + self.write_status_file( + success, section_name, self._additional_access_role_members + ) return success - # end method definition + # end method definition - def process_workspace_members(self, section_name: str = "workspaceMembers") -> bool: - """Process workspaces members in payload and create them in Extended ECM. + def process_renamings(self, section_name: str = "renamings") -> bool: + """Process renamings specified in payload and rename existing Extended ECM items. Args: section_name (str, optional): name of the section. It can be overridden @@ -6467,8 +10980,8 @@ def process_workspace_members(self, section_name: str = "workspaceMembers") -> b bool: True if payload has been processed without errors, False otherwise """ - if not self._workspaces: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._renamings: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -6478,277 +10991,262 @@ def process_workspace_members(self, section_name: str = "workspaceMembers") -> b success: bool = True - for workspace in self._workspaces: - # Read name from payload (just for logging): - if not "name" in workspace: + for renaming in self._renamings: + if not "name" in renaming: + logger.error("Renamings require the new name!") continue - workspace_name = workspace["name"] + if not "nodeid" in renaming: + if not "volume" in renaming: + logger.error( + "Renamings require either a node ID or a volume! Skipping to next renaming..." + ) + continue + # Determine object ID of volume: + volume = self._otcs.get_volume(renaming["volume"]) + node_id = self._otcs.get_result_value(volume, "id") + else: + node_id = renaming["nodeid"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in workspace and not workspace["enabled"]: - logger.info( - "Payload for Workspace -> %s is disabled. Skipping...", - workspace_name, - ) + if "enabled" in renaming and not renaming["enabled"]: + logger.info("Payload for Renaming is disabled. Skipping...") continue - # Read members from payload: - if not "members" in workspace: - logger.info( - "Workspace -> %s has no members in payload. Skipping to next workspace...", - workspace_name, + response = self._otcs.rename_node( + int(node_id), renaming["name"], renaming.get("description", "") + ) + if not response: + logger.error( + "Failed to rename node ID -> '%s' to new name -> '%s'.", + node_id, + renaming["name"], ) - continue - members = workspace["members"] + success = False - workspace_id = workspace["id"] - logger.info( - "Workspace -> %s has memberships in payload - establishing...", - workspace_name, - ) + self.write_status_file(success, section_name, self._renamings) - workspace_node_id = int(self.determine_workspace_id(workspace)) - if not workspace_node_id: - logger.warning( - "Workspace without node ID cannot have members (workspaces creation may have failed). Skipping to next workspace..." - ) + return success + + # end method definition + + def process_items(self, items: list, section_name: str = "items") -> bool: + """Process items specified in payload and create them in Extended ECM. + + Args: + items (list): list of items to create (need this as parameter as we + have multiple lists) + section_name (str, optional): name of the section. It can be overridden + for cases where multiple sections of same type + are used (e.g. the "Post" sections like "itemsPost") + This name is also used for the "success" status + files written to the Admin Personal Workspace + Returns: + bool: True if payload has been processed without errors, False otherwise + """ + + if not items: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) + + return True + + # If this payload section has been processed successfully before we + # can return True and skip processing it once more: + if self.check_status_file(section_name): + return True + + success: bool = True + + for item in items: + if not "name" in item: + logger.error("Item needs a name. Skipping...") continue + item_name = item["name"] - # now determine the actual node IDs of the workspaces (have been created by process_workspaces()): - workspace_node = self._otcs.get_node(workspace_node_id) - workspace_owner_id = self._otcs.get_result_value( - workspace_node, "owner_user_id" - ) - workspace_owner_name = self._otcs.get_result_value(workspace_node, "owner") - - workspace_roles = self._otcs.get_workspace_roles(workspace_node_id) - if workspace_roles is None: + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in item and not item["enabled"]: logger.info( - "Workspace with ID -> %s and node Id -> %s has no roles. Skipping to next workspace...", - workspace_id, - workspace_node_id, + "Payload for Item -> '%s' is disabled. Skipping...", item_name ) continue - # We don't want the workspace creator to be in the leader role - # of automatically created workspaces - this can happen because the - # creator gets added to the leader role automatically: - leader_role_id = self._otcs.lookup_result_value( - workspace_roles, "leader", True, "id" - ) - - if leader_role_id: - leader_role_name = self._otcs.lookup_result_value( - workspace_roles, "leader", True, "name" - ) - response = self._otcs.remove_member_from_workspace( - workspace_node_id, leader_role_id, workspace_owner_id, False - ) - if response: - logger.info( - "Removed creator user -> %s (%s) from leader role -> {%s (%s) of workspace -> %s", - workspace_owner_name, - workspace_owner_id, - leader_role_name, - leader_role_id, - workspace_name, - ) - - logger.info( - "Adding members to workspace with ID -> %s and node ID -> %s defined in payload...", - workspace_id, - workspace_node_id, - ) + if not "description" in item: + item_description = "" + else: + item_description = item["description"] - for member in members: - # read user list and role name from payload: - member_users = ( - member["users"] if member.get("users") else [] - ) # be careful to avoid key errors as users are optional - member_groups = ( - member["groups"] if member.get("groups") else [] - ) # be careful to avoid key errors as groups are optional - member_role_name = member["role"] + parent_nickname = item.get("parent_nickname", None) + parent_path = item.get("parent_path", None) - if member_role_name == "": # role name is required + if parent_nickname: # parent nickname has preference over parent path + parent_node = self._otcs.get_node_from_nickname(parent_nickname) + parent_id = self._otcs.get_result_value(parent_node, "id") + # if not parent_node: + if not parent_id: logger.error( - "Members of workspace -> %s is missing the role name.", - workspace_name, + "Item -> '%s' has a parent nickname -> '%s' that does not exist. Skipping...", + item_name, + parent_nickname, ) success = False continue - if ( - member_users == [] and member_groups == [] - ): # we either need users or groups (or both) - logger.warning( - "Role -> %s of workspace -> %s does not have any members (no users nor groups).", - member_role_name, - workspace_name, + else: + # elif parent_path is not None: # use parent_path and Enterprise Volume + # there's a difference between [] and None! + parent_node = self._otcs.get_node_by_volume_and_path( + volume_type=141, path=parent_path, create_path=True + ) + parent_id = self._otcs.get_result_value(parent_node, "id") + if not parent_id: + # if not parent_node: + logger.error( + "Item -> '%s' has a parent path that does not exist. Skipping...", + item_name, ) + success = False continue + # else: + # logger.error( + # "Item -> '%s' has neither a parent nickname nor a parent path. Skipping...", + # item_name, + # ) + # success = False + # continue - role_id = self._otcs.lookup_result_value( - workspace_roles, "name", member_role_name, "id" + original_nickname = item.get("original_nickname") + original_path = item.get("original_path") + + if original_nickname: + original_node = self._otcs.get_node_from_nickname(original_nickname) + original_id = self._otcs.get_result_value(original_node, "id") + if not original_id: + # if not original_node: + logger.error( + "Item -> '%s' has a original nickname -> '%s' that does not exist. Skipping...", + item_name, + original_nickname, + ) + success = False + continue + elif original_path: + original_node = self._otcs.get_node_by_volume_and_path( + 141, original_path ) - if role_id is None: - # if member_role is None: + original_id = self._otcs.get_result_value(original_node, "id") + if not original_id: + # if not original_node: logger.error( - "Workspace -> %s does not have a role with name -> %s", - workspace_name, - member_role_name, + "Item -> '%s' has a original path that does not exist. Skipping...", + item_name, ) success = False continue - logger.info("Role -> %s has ID -> %s", member_role_name, role_id) + else: + original_id = 0 - # Process users as workspaces members: - for member_user in member_users: - # find member user in current payload: - member_user_id = next( - (item for item in self._users if item["name"] == member_user), - {}, - ) - if member_user_id: - user_id = member_user_id["id"] - else: - # If this didn't work, try to get the member user from OTCS. This covers - # cases where the user is system generated or part - # of a former payload processing (thus not in the current payload): - logger.info( - "Member -> %s not found in current payload - check if it exists in OTCS already...", - member_user, - ) - response = self._otcs.get_user(member_user) - user_id = self._otcs.lookup_result_value( - response, key="name", value=member_user, return_key="id" - ) - if not user_id: - logger.error( - "Cannot find member user with login -> %s. Skipping...", - member_user, - ) - continue + if not "type" in item: + logger.error("Item -> '%s' needs a type. Skipping...", item_name) + success = False + continue - # Add member if it does not yet exists - suppress warning - # message if user is already in role: - response = self._otcs.add_member_to_workspace( - workspace_node_id, int(role_id), user_id, False - ) - if response is None: + item_type = item.get("type") + item_url = item.get("url") + + # check that we have the required information + # for the given item type: + match item_type: + case 140: # URL + if item_url == "": logger.error( - "Failed to add user -> %s (%s) to role -> %s of workspace -> %s", - member_user, - user_id, - member_role_name, - workspace_name, - ) - success = False - else: - logger.info( - "Successfully added user -> %s (%s) to role -> %s of workspace -> %s", - member_user, # member_user_id["name"], - user_id, - member_role_name, - workspace_name, + "Item -> '%s' has type URL but the URL is not in the payload. Skipping...", + item_name, ) - - # Process groups as workspaces members: - for member_group in member_groups: - member_group_id = next( - (item for item in self._groups if item["name"] == member_group), - None, - ) - if member_group_id is None: - logger.error("Cannot find group with name -> %s", member_group) success = False continue - group_id = member_group_id["id"] - - response = self._otcs.add_member_to_workspace( - workspace_node_id, int(role_id), group_id - ) - if response is None: + case 1: # Shortcut + if original_id == 0: logger.error( - "Failed to add group -> %s (%s) to role -> %s of workspace -> %s", - member_group_id["name"], - group_id, - member_role_name, - workspace_name, + "Item -> '%s' has type Shortcut but the original item is not in the payload. Skipping...", + item_name, ) success = False - else: - logger.info( - "Successfully added group -> %s (%s) to role -> %s of workspace -> %s", - member_group_id["name"], - group_id, - member_role_name, - workspace_name, - ) - - # Optionally the payload may have a permission list for the role - # to change the default permission from the workspace template - # to something more specific: - member_permissions = member.get("permissions", []) - if member_permissions == []: - logger.info( - "No permission change for workspace -> %s and role -> %s.", - workspace_name, - member_role_name, - ) - continue + continue + # Check if an item with the same name does already exist. + # This can also be the case if the python container runs a 2nd time. + # For this reason we are also not issuing an error but just an info (False): + response = self._otcs.get_node_by_parent_and_name( + int(parent_id), item_name, show_error=False + ) + if self._otcs.get_result_value(response, "name") == item_name: logger.info( - "Update permissions of workspace -> %s (%s) and role -> %s to -> %s", - workspace_name, - str(workspace_node_id), - member_role_name, - str(member_permissions), - ) - response = self._otcs.assign_permission( - node_id=workspace_node_id, - assignee_type="custom", - assignee=role_id, - permissions=member_permissions, - apply_to=2, + "Item with name -> '%s' does already exist in parent folder with ID -> %s", + item_name, + parent_id, ) - if not response: - logger.error( - "Failed to update permissions of workspace -> %s (%s) and role -> %s to -> %s.", - workspace_name, - str(workspace_node_id), - member_role_name, - str(member_permissions), - ) - success = False + continue + response = self._otcs.create_item( + int(parent_id), + str(item_type), + item_name, + item_description, + item_url, + int(original_id), + ) + if not response: + logger.error("Failed to create item -> '%s'.", item_name) + success = False + else: + logger.info("Item -> '%s' has been created successfully.", item_name) - self.write_status_file(success, section_name, self._workspaces) + self.write_status_file(success, section_name, items) return success - # end method definition + # end method definition - def process_workspace_member_permissions( - self, section_name: str = "workspaceMemberPermissions" + def process_permissions( + self, permissions: list, section_name: str = "permissions" ) -> bool: - """Process workspaces members in payload and set their permissions. - We need this separate from process_workspace_members() with also - sets permissions (if in payload) as we add documents to workspaces with - content transports and these documents don't inherit role permissions - (this is a transport limitation) + """Process items specified in payload and upadate permissions. Args: + permissions (list): list of items to apply permissions to. + Each list item in the payload is a dict with this structure: + { + nodeid = "..." + volume = "..." + nickname = "..." + public_permissions = ["see", "see_content", ...] + owner_permissions = [] + owner_group_permissions = [] + groups = [ + { + name = "..." + permissions = [] + } + ] + users = [ + { + name = "..." + permissions = [] + } + ] + apply_to = 2 + } section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type - are used (e.g. the "Post" sections) + are used (e.g. the "Post" sections like "permissionsPost") This name is also used for the "success" status files written to the Admin Personal Workspace + Returns: bool: True if payload has been processed without errors, False otherwise """ - if not self._workspaces: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not permissions: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -6758,130 +11256,266 @@ def process_workspace_member_permissions( success: bool = True - for workspace in self._workspaces: - # Read name from payload (just for logging): - if not "name" in workspace: + for permission in permissions: + if ( + not "path" in permission + and not "volume" in permission + and not "nickname" in permission + ): + logger.error( + "Item to change permission is not specified (needs path, volume, or nickname). Skipping..." + ) + success = False continue - workspace_name = workspace["name"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in workspace and not workspace["enabled"]: - logger.info( - "Payload for Workspace -> %s is disabled. Skipping...", - workspace_name, - ) + if "enabled" in permission and not permission["enabled"]: + logger.info("Payload for Permission is disabled. Skipping...") continue - # Read members from payload: - if not "members" in workspace: + node_id = 0 + + # Check if "volume" is in payload and not empty string + # we try to get the node ID from the volume type: + if "volume" in permission and permission["volume"]: + volume_type = permission["volume"] logger.info( - "Workspace -> %s has no members in payload. No need to update permissions. Skipping to next workspace...", - workspace_name, + "Found volume type -> '%s' in permission payload. Determine volume ID...", + volume_type, ) - continue - members = workspace["members"] + node = self._otcs.get_volume(volume_type) + node_id = self._otcs.get_result_value(node, "id") + if not node_id: + logger.error( + "Illegal volume -> '%s' in permission payload. Skipping...", + volume_type, + ) + success = False + continue + else: + # the following path block requires + # a value for the volume - if it is + # not specified we take the Enterprise Workspace (141): + volume_type = 141 - workspace_id = workspace["id"] - workspace_node_id = int(self.determine_workspace_id(workspace)) - if not workspace_node_id: - logger.warning( - "Workspace without node ID cannot cannot get permission changes (workspaces creation may have failed). Skipping to next workspace..." + # Check if "path" is in payload and not empty list + # (path can be combined with volume so we need to take volume into account): + if "path" in permission and permission["path"]: + path = permission["path"] + logger.info( + "Found path -> '%s' in permission payload. Determine node ID...", + path, ) - continue + node = self._otcs.get_node_by_volume_and_path(volume_type, path) + node_id = self._otcs.get_result_value(node, "id") + if not node_id: + logger.error("Path -> '%s' does not exist. Skipping...", path) + success = False + continue - workspace_roles = self._otcs.get_workspace_roles(workspace_node_id) - if workspace_roles is None: + # Check if "nickname" is in payload and not empty string: + if "nickname" in permission and permission["nickname"]: + nickname = permission["nickname"] logger.info( - "Workspace with ID -> %s and node Id -> %s has no roles to update permissions. Skipping to next workspace...", - workspace_id, - workspace_node_id, + "Found nickname -> '%s' in permission payload. Determine node ID...", + nickname, ) - continue - - for member in members: - # read user list and role name from payload: - member_users = ( - member["users"] if member.get("users") else [] - ) # be careful to avoid key errors as users are optional - member_groups = ( - member["groups"] if member.get("groups") else [] - ) # be careful to avoid key errors as groups are optional - member_role_name = member["role"] - - if member_role_name == "": # role name is required + node = self._otcs.get_node_from_nickname(nickname) + node_id = self._otcs.get_result_value(node, "id") + if not node_id: logger.error( - "Members of workspace -> %s is missing the role name.", - workspace_name, + "Nickname -> '%s' does not exist. Skipping...", nickname ) success = False continue - if ( - member_users == [] and member_groups == [] - ): # we either need users or groups (or both) - logger.warning( - "Role -> %s of workspace -> %s does not have any members (no users nor groups).", - member_role_name, - workspace_name, - ) - continue - role_id = self._otcs.lookup_result_value( - workspace_roles, "name", member_role_name, "id" + # Now we should have a value for node_id: + if not node_id: + logger.error("No node ID found! Skipping permission...") + success = False + continue + + node_name = self._otcs.get_result_value(node, "name") + logger.info( + "Found node -> '%s' with ID -> %s to apply permission to.", + node_name, + node_id, + ) + # write node information back into payload + # for better debugging + permission["node_name"] = node_name + permission["node_id"] = node_id + + if "apply_to" in permission: + apply_to = permission["apply_to"] + else: + apply_to = 2 # make item + sub-items the default + + # 1. Process Owner Permissions (list canbe empty!) + if "owner_permissions" in permission: + owner_permissions = permission["owner_permissions"] + logger.info( + "Update owner permissions for item -> '%s' (%s) to -> %s", + node_name, + str(node_id), + str(owner_permissions), ) - if role_id is None: + response = self._otcs.assign_permission( + int(node_id), "owner", 0, owner_permissions, apply_to + ) + if not response: logger.error( - "Workspace -> %s does not have a role with name -> %s", - workspace_name, - member_role_name, + "Failed to update owner permissions for item -> '%s' (%s).", + node_name, + str(node_id), ) success = False - continue - logger.info("Role -> %s has ID -> %s", member_role_name, role_id) - member_permissions = member.get("permissions", []) - if member_permissions == []: - logger.info( - "No permission change for workspace -> %s and role -> %s.", - workspace_name, - member_role_name, + # 2. Process Owner Group Permissions + if "owner_group_permissions" in permission: + owner_group_permissions = permission["owner_group_permissions"] + logger.info( + "Update owner group permissions for item -> '%s' (%s) to -> %s", + node_name, + str(node_id), + str(owner_group_permissions), + ) + response = self._otcs.assign_permission( + int(node_id), "group", 0, owner_group_permissions, apply_to + ) + if not response: + logger.error( + "Failed to update group permissions for item -> '%s' (%s).", + node_name, + str(node_id), ) - continue + success = False + # 3. Process Public Permissions + if "public_permissions" in permission: + public_permissions = permission["public_permissions"] logger.info( - "Update permissions of workspace -> %s (%s) and role -> %s to -> %s", - workspace_name, - str(workspace_node_id), - member_role_name, - str(member_permissions), + "Update public permissions for item -> '%s' (%s) to -> %s", + node_name, + str(node_id), + str(public_permissions), ) response = self._otcs.assign_permission( - node_id=workspace_node_id, - assignee_type="custom", - assignee=role_id, - permissions=member_permissions, - apply_to=2, + int(node_id), "public", 0, public_permissions, apply_to ) if not response: logger.error( - "Failed to update permissions of workspace -> %s (%s) and role -> %s to -> %s.", - workspace_name, - str(workspace_node_id), - member_role_name, - str(member_permissions), + "Failed to update public permissions for item -> '%s' (%s).", + node_name, + str(node_id), + ) + success = False + continue + + # 4. Process Assigned User Permissions (if specified and not empty) + if "users" in permission and permission["users"]: + users = permission["users"] + for user in users: + if not "name" in user or not "permissions" in user: + logger.error( + "Missing user name in user permission specificiation. Cannot set user permissions. Skipping..." + ) + success = False + continue + user_name = user["name"] + if not "permissions" in user: + logger.error( + "Missing permissions in user -> '%s' permission specificiation. Cannot set user permissions. Skipping...", + user_name, + ) + success = False + continue + user_permissions = user["permissions"] + response = self._otcs.get_user(name=user_name, show_error=True) + user_id = self._otcs.get_result_value(response=response, key="id") + if not user_id: + logger.error( + "Cannot find user with name -> '%s'; cannot set user permissions. Skipping user...", + user_name, + ) + success = False + continue + user["id"] = user_id # write ID back into payload + + logger.info( + "Update permission of user -> '%s' for item -> '%s' (%s) to -> %s", + user_name, + node_name, + str(node_id), + str(user_permissions), + ) + response = self._otcs.assign_permission( + int(node_id), "custom", user_id, user_permissions, apply_to + ) + if not response: + logger.error( + "Failed to update assigned user permissions for item -> %s.", + node_id, + ) + success = False + + # 5. Process Assigned Group Permissions (if specified and not empty) + if "groups" in permission and permission["groups"]: + groups = permission["groups"] + for group in groups: + if not "name" in group: + logger.error( + "Missing group name in group permission specificiation. Cannot set group permissions. Skipping..." + ) + success = False + continue + group_name = group["name"] + if not "permissions" in group: + logger.error( + "Missing permissions in group -> '%s' permission specificiation. Cannot set group permissions. Skipping...", + group_name, + ) + success = False + continue + group_permissions = group["permissions"] + logger.info( + "Update permissions of group -> '%s' for item -> '%s' (%s) to -> %s", + group_name, + node_name, + str(node_id), + str(group_permissions), ) - success = False + otcs_group = self._otcs.get_group(group_name, True) + group_id = self._otcs.get_result_value(otcs_group, "id") + if not group_id: + logger.error( + "Cannot find group with name -> '%s'; cannot set group permissions. Skipping group...", + group_name, + ) + success = False + continue + group["id"] = group_id # write ID back into payload + response = self._otcs.assign_permission( + int(node_id), "custom", group_id, group_permissions, apply_to + ) + if not response: + logger.error( + "Failed to update assigned group permissions for item -> '%s' (%s).", + node_name, + str(node_id), + ) + success = False - self.write_status_file(success, section_name, self._workspaces) + self.write_status_file(success, section_name, permissions) return success - # end method definition + # end method definition - def process_workspace_aviators( - self, section_name: str = "workspaceAviators" - ) -> bool: - """Process workspaces Content Aviator settings in payload and enable Aviator for selected workspaces. + def process_assignments(self, section_name: str = "assignments") -> bool: + """Process assignments specified in payload and assign items (such as workspaces and + items with nicknames) to users or groups. Args: section_name (str, optional): name of the section. It can be overridden @@ -6893,8 +11527,8 @@ def process_workspace_aviators( bool: True if payload has been processed without errors, False otherwise """ - if not self._workspaces: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._assignments: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -6904,233 +11538,187 @@ def process_workspace_aviators( success: bool = True - for workspace in self._workspaces: - # Read name from payload (just for logging): - if not "name" in workspace: - continue - workspace_name = workspace["name"] - - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in workspace and not workspace["enabled"]: - logger.info( - "Payload for Workspace -> %s is disabled. Skipping...", - workspace_name, - ) - continue - - # Read Aviator setting from payload: - if not "enable_aviator" in workspace or not workspace["enable_aviator"]: - logger.info( - "Aviator is not enabled for Workspace -> %s. Skipping to next workspace...", - workspace_name, - ) - continue - - # We cannot just lookup with workspace.get("nodeId") as the customizer - # may have been restarted inbetween - so we use our proper determine_workspace_id - # here... - workspace_id = self.determine_workspace_id(workspace) - if not workspace_id: - logger.error( - "Cannot find node ID for workspace -> %s. Workspace creation may have failed. Skipping to next workspace...", - workspace_name, - ) - success = False - continue - - # Make code idem-potent and check if Aviator is already enabled - # for this workspace: - if self._otcs.check_workspace_aviator(workspace_id=workspace_id): - logger.info( - "Skip workspace -> %s (%s) as Aviator is already enabled...", - workspace_name, - workspace_id, - ) - continue - - # Now enable the Content Aviator for the workspace: - response = self._otcs.update_workspace_aviator(workspace_id, True) - if not response: - logger.error( - "Failed to enable Content Aviator for workspace -> %s (%s)", - workspace_name, - workspace_id, - ) + for assignment in self._assignments: + # Sanity check: we need a subject - it's mandatory: + if not "subject" in assignment: + logger.error("Assignment needs a subject! Skipping assignment...") success = False continue - - self.write_status_file(success, section_name, self._workspaces) - - return success - - # end method definition - - def process_web_reports( - self, web_reports: list, section_name: str = "webReports" - ) -> bool: - """Process web reports in payload and run them in Extended ECM. - - Args: - web_reports (list): list of web reports. As we have two different list (pre and post) - we need to pass the actual list as parameter. - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections like "webReportsPost") - This name is also used for the "success" status - files written to the Admin Personal Workspace - Returns: - bool: True if a restart of the OTCS pods is required. False otherwise. - """ - - if not web_reports: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return False # important to return False here as otherwise we are triggering a restart of services!! - - # If this payload section has been processed successfully before we - # can return False and skip processing it once more: - if self.check_status_file(section_name): - return False # important to return False here as otherwise we are triggering a restart of services!! - - restart_required: bool = False - success: bool = True - - for web_report in web_reports: - nick_name = web_report["nickname"] + subject = assignment["subject"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in web_report and not web_report["enabled"]: + if "enabled" in assignment and not assignment["enabled"]: logger.info( - "Payload for Web Report -> %s is disabled. Skipping...", nick_name + "Payload for Assignment -> '%s' is disabled. Skipping...", subject ) continue - description = web_report["description"] - restart = web_report.get("restart", False) - - if not self._otcs.get_node_from_nickname(nick_name): + # instruction is optional but we give a warning if they are missing: + if not "instruction" in assignment: + logger.warning( + "Assignment -> '%s' should have an instruction!", subject + ) + instruction = "" + else: + instruction = assignment["instruction"] + # Sanity check: we either need users or groups (or both): + if not "groups" in assignment and not "users" in assignment: logger.error( - "Web Report with nickname -> %s does not exist! Skipping...", - nick_name, + "Assignment -> '%s' needs groups or users! Skipping assignment...", + subject, ) success = False continue - - # be careful to avoid key errors as Web Report parameters are optional: - actual_params = ( - web_report["parameters"] if web_report.get("parameters") else {} - ) - formal_params = self._otcs.get_web_report_parameters(nick_name) - if actual_params: - logger.info( - "Running Web Report -> %s (%s) with parameters -> %s ...", - nick_name, - description, - actual_params, + # Check if a workspace is specified for the assignment and check it does exist: + if "workspace" in assignment and assignment["workspace"]: + workspace = next( + ( + item + for item in self._workspaces + if item["id"] == assignment["workspace"] + ), + None, ) - # Do some sanity checks to see if the formal and actual parameters are matching... - # Check 1: are there formal parameters at all? - if not formal_params: + if not workspace: logger.error( - "Web Report -> %s is called with actual parameters but it does not expect parameters! Skipping...", - nick_name, + "Assignment -> '%s' has specified a not existing workspace -> %s! Skipping assignment...", + subject, + assignment["workspace"], ) success = False continue - lets_continue = False - # Check 2: Iterate through the actual parameters given in the payload - # and see if there's a matching formal parameter expected by the Web Report: - for key, value in actual_params.items(): - # Check if there's a matching formal parameter defined on the Web Report node: - formal_param = next( - (item for item in formal_params if item["parm_name"] == key), - None, + node_id = self.determine_workspace_id(workspace) + if not node_id: + logger.error( + "Assignment -> '%s' has specified a not existing workspace -> %s! Skipping assignment...", + subject, + assignment["workspace"], ) - if formal_param is None: - logger.error( - "Web Report -> %s is called with parameter -> %s that is not expected! Value: %s) Skipping...", - nick_name, - key, - value, - ) - success = False - lets_continue = True # we cannot do a "continue" here directly as we are in an inner loop - # Check 3: Iterate through the formal parameters and validate there's a matching - # actual parameter defined in the payload for each mandatory formal parameter - # that does not have a default value: - for formal_param in formal_params: - if ( - (formal_param["mandatory"] is True) - and (formal_param["default_value"] is None) - and not actual_params.get(formal_param["parm_name"]) - ): - logger.error( - "Web Report -> %s is called without mandatory parameter -> %s! Skipping...", - nick_name, - formal_param["parm_name"], - ) - success = False - lets_continue = True # we cannot do a "continue" here directly as we are in an inner loop - # Did any of the checks fail? - if lets_continue: + success = False + continue + # If we don't have a workspace then check if a nickname is specified for the assignment: + elif "nickname" in assignment: + response = self._otcs.get_node_from_nickname(assignment["nickname"]) + node_id = self._otcs.get_result_value(response, "id") + if not node_id: + # if response == None: + logger.error( + "Assignment item with nickname -> '%s' not found", + assignment["nickname"], + ) + success = False continue - # Actual parameters are validated, we can run the Web Report: - response = self._otcs.run_web_report(nick_name, actual_params) else: - logger.info( - "Running Web Report -> %s (%s) without parameters...", - nick_name, - description, + logger.error( + "Assignment -> '%s' needs a workspace or nickname! Skipping assignment...", + subject, ) - # Check if there's a formal parameter that is mandatory but - # does not have a default value: - if formal_params: - required_param = next( + success = False + continue + + assignees = [] + + if "groups" in assignment: + group_assignees = assignment["groups"] + for group_assignee in group_assignees: + # find the group in the group list + group = next( ( - item - for item in formal_params - if (item["mandatory"] is True) - and (not item["default_value"]) + item + for item in self._groups + if item["name"] == group_assignee ), None, ) - if required_param: + if not group: logger.error( - "Web Report -> %s is called without parameters but has a mandatory parameter -> %s without a default value! Skipping...", - nick_name, - required_param["parm_name"], + "Assignment group -> '%s' does not exist! Skipping group...", + group_assignee, ) success = False continue - else: # we are good to proceed! - logger.debug( - "Web Report -> %s does not have a mandatory parameter without a default value!", - nick_name, + if not "id" in group: + logger.error( + "Assignment group -> '%s' does not have an ID. Skipping group...", + group_assignee, ) - response = self._otcs.run_web_report(nick_name) - if response is None: - logger.error("Failed to run web report -> %s", nick_name) + success = False + continue + group_id = group["id"] + # add the group ID to the assignee list: + assignees.append(group_id) + + if "users" in assignment: + user_assignees = assignment["users"] + for user_assignee in user_assignees: + # find the user in the user list + user = next( + (item for item in self._users if item["name"] == user_assignee), + None, + ) + if not user: + logger.error( + "Assignment user -> '%s' does not exist! Skipping user...", + user_assignee, + ) + success = False + continue + if not "id" in user: + logger.error( + "Assignment user -> '%s' does not have an ID. Skipping user...", + user_assignee, + ) + success = False + continue + user_id = user["id"] + # add the group ID to the assignee list: + assignees.append(user_id) + + if not assignees: + logger.error( + "Cannot add assignment -> '%s' for node ID -> %s because no assignee was found.", + subject, + node_id, + ) success = False + continue - if restart: - restart_required = True + response = self._otcs.assign_item_to_user_group( + int(node_id), subject, instruction, assignees + ) + if not response: + logger.error( + "Failed to add assignment -> '%s' for node ID -> %s with assignees -> %s.", + subject, + node_id, + assignees, + ) + success = False - self.write_status_file(success, section_name, web_reports) + self.write_status_file(success, section_name, self._assignments) - return restart_required + return success - # end method definition + # end method definition - def process_cs_applications( - self, otcs_object: OTCS, section_name: str = "csApplications" + def process_user_licenses( + self, + resource_name: str, + license_feature: str, + license_name: str, + user_specific_payload_field: str = "licenses", + section_name: str = "userLicenses", ) -> bool: - """Process CS applications in payload and install them in Extended ECM. - The CS Applications need to be installed in all frontend and backends. + """Assign a specific OTDS license feature to all Extended ECM users. + This method is used for OTIV and Extended ECM licenses. Args: - otcs_object (object): this can either be the OTCS frontend or OTCS backend. If None - then the otcs_backend is used. + resource_name (str): name of the OTDS resource + license_feature (str): license feature to assign to the user (product specific) + license_name (str): Name of the license Key (e.g. "EXTENDED_ECM" or "INTELLIGENT_VIEWING") + user_specific_payload_field (str, optional): name of the user specific field in payload + (if empty it will be ignored) section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type are used (e.g. the "Post" sections) @@ -7140,8 +11728,8 @@ def process_cs_applications( bool: True if payload has been processed without errors, False otherwise """ - if not self._cs_applications: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._users: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -7151,45 +11739,79 @@ def process_cs_applications( success: bool = True - # OTCS backend is the default: - if not otcs_object: - otcs_object = self._otcs_backend + otds_resource = self._otds.get_resource(resource_name) + if not otds_resource: + logger.error( + "OTDS Resource -> '%s' not found. Cannot assign licenses to users.", + resource_name, + ) + return False - for cs_application in self._cs_applications: - application_name = cs_application["name"] + user_partition = self._otcs.config()["partition"] + if not user_partition: + logger.error("OTCS user partition not found in OTDS!") + return False + + for user in self._users: + user_name = user["name"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in cs_application and not cs_application["enabled"]: + if "enabled" in user and not user["enabled"]: logger.info( - "Payload for CS Application -> %s is disabled. Skipping...", - application_name, + "Payload for User -> '%s' is disabled. Skipping...", user_name ) continue - application_description = cs_application["description"] + if user_specific_payload_field and user_specific_payload_field in user: + logger.info( + "Found specific license feature -> %s for User -> '%s'. Overwriting default license feature -> %s", + user[user_specific_payload_field], + user_name, + license_feature, + ) + user_license_feature = user[user_specific_payload_field] + else: # use the default feature from the actual parameter + user_license_feature = [license_feature] - logger.info( - "Install CS Application -> %s (%s)...", - application_name, - application_description, - ) - response = otcs_object.install_cs_application(application_name) - if response is None: - logger.error( - "Failed to install CS Application -> %s!", application_name + for license_feature in user_license_feature: + if self._otds.is_user_licensed( + user_name=user_name, + resource_id=otds_resource["resourceID"], + license_feature=license_feature, + license_name=license_name, + ): + logger.info( + "User -> '%s' is already licensed for -> '%s' (%s)", + user_name, + license_name, + license_feature, + ) + continue + assigned_license = self._otds.assign_user_to_license( + user_partition, + user_name, # we want the plain login name here + otds_resource["resourceID"], + license_feature, + license_name, ) - success = False - self.write_status_file(success, section_name, self._cs_applications) + if not assigned_license: + logger.error( + "Failed to assign license feature -> '%s' to user -> %s!", + license_feature, + user_name, + ) + success = False + + self.write_status_file(success, section_name, self._users) return success - # end method definition + # end method definition - def process_user_settings(self, section_name: str = "userSettings") -> bool: - """Process user settings in payload and apply themin OTDS. - This includes password settings and user display settings. + def process_exec_pod_commands(self, section_name: str = "execPodCommands") -> bool: + """Process commands that should be executed in the Kubernetes pods. Args: section_name (str, optional): name of the section. It can be overridden @@ -7201,8 +11823,15 @@ def process_user_settings(self, section_name: str = "userSettings") -> bool: bool: True if payload has been processed without errors, False otherwise """ - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not isinstance(self._k8s, K8s): + logger.error( + "K8s not setup properly -> Skipping payload section -> '%s'...", + section_name, + ) + return False + + if not self._exec_pod_commands: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -7212,78 +11841,96 @@ def process_user_settings(self, section_name: str = "userSettings") -> bool: success: bool = True - for user in self._users: - user_name = user["name"] - - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in user and not user["enabled"]: - logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + for exec_pod_command in self._exec_pod_commands: + if not "pod_name" in exec_pod_command: + logger.error( + "To execute a command in a pod the pod name needs to be specified in the payload! Skipping to next pod command..." ) + success = False continue + pod_name = exec_pod_command["pod_name"] - user_partition = self._otcs.config()["partition"] - if not user_partition: - logger.error("User partition not found!") + if not "command" in exec_pod_command or not exec_pod_command.get("command"): + logger.error( + "Pod command is not specified for pod -> %s! It needs to be a non-empty list! Skipping to next pod command...", + pod_name, + ) success = False continue + command = exec_pod_command["command"] - # Set the OTDS display name. Extended ECM does not use this but - # it makes AppWorks display users correctly (and it doesn't hurt) - # We only set this if firstname _and_ last name are in the payload: - if "firstname" in user and "lastname" in user: - user_display_name = user["firstname"] + " " + user["lastname"] - response = self._otds.update_user( - user_partition, user_name, "displayName", user_display_name + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in exec_pod_command and not exec_pod_command["enabled"]: + logger.info( + "Payload for Exec Pod Command in pod -> '%s' is disabled. Skipping...", + pod_name, ) - if response: - logger.info( - "Display name for user -> %s has been updated to -> %s", - user_name, - user_display_name, - ) - else: - logger.error( - "Display name for user -> %s could not be updated to -> %s", - user_name, - user_display_name, - ) - success = False + continue - # Don't enforce the user to reset password at first login (settings in OTDS): - logger.info("Don't enforce password change for user -> %s...", user_name) - response = self._otds.update_user( - user_partition, user_name, "UserMustChangePasswordAtNextSignIn", "False" - ) - if not response: - success = False + if not "description" in exec_pod_command: + logger.info("Executing command -> %s in pod -> '%s'", command, pod_name) - response = self._otds.update_user( - user_partition, user_name, "UserCannotChangePassword", "True" - ) - if not response: - success = False + else: + description = exec_pod_command["description"] + logger.info( + "Executing command -> %s in pod -> '%s' (%s)", + command, + pod_name, + description, + ) - # Set user password to never expire - response = self._otds.update_user( - user_partition, user_name, "PasswordNeverExpires", "True" - ) - if not response: + if ( + not "interactive" in exec_pod_command + or exec_pod_command["interactive"] is False + ): + result = self._k8s.exec_pod_command(pod_name, command) + else: + if not "timeout" in exec_pod_command: + result = self._k8s.exec_pod_command_interactive(pod_name, command) + else: + timeout = exec_pod_command["timeout"] + result = self._k8s.exec_pod_command_interactive( + pod_name, command, timeout + ) + + # we need to differentiate 3 cases here: + # 1. result = None is returned - this is an error (exception) + # 2. result is empty string - this is OK + # 3. result is a non-empty string - this is OK - print it to log + if result is None: + logger.error( + "Execution of command -> '%s' in pod -> '%s' failed", + command, + pod_name, + ) success = False + elif result != "": + logger.info( + "Execution of command -> '%s' in pod -> '%s' returned result -> %s", + command, + pod_name, + result, + ) + else: + # It is not an error if no result is returned. It depends on the nature of the command + # if a result is written to stdout or stderr. + logger.info( + "Execution of command -> '%s' in pod -> '%s' did not return a result", + command, + pod_name, + ) - self.write_status_file(success, section_name, self._users) + self.write_status_file(success, section_name, self._exec_pod_commands) return success - # end method definition + # end method definition - def process_user_favorites_and_profiles( - self, section_name: str = "userFavoritesAndProfiles" + def process_document_generators( + self, section_name: str = "documentGenerators" ) -> bool: - """Process user favorites in payload and create them in Extended ECM. - This method also simulates browsing the favorites to populate the - widgets on the landing pages and sets personal preferences. + """Generate documents for a defined workspace type based on template Args: section_name (str, optional): name of the section. It can be overridden @@ -7295,8 +11942,8 @@ def process_user_favorites_and_profiles( bool: True if payload has been processed without errors, False otherwise """ - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._doc_generators: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we @@ -7306,402 +11953,648 @@ def process_user_favorites_and_profiles( success: bool = True - # We can only set favorites if we impersonate / authenticate as the user. - # The following code (for loop) will change the authenticated user - we need to - # switch it back to admin user later so we safe the admin credentials for this: - - if self._users: - # save admin credentials for later switch back to admin user: - admin_credentials = self._otcs.credentials() - else: - admin_credentials = {} + # save admin credentials for later switch back to admin user: + admin_credentials = self._otcs.credentials() + authenticated_user = "admin" - for user in self._users: - user_name = user["name"] + for doc_generator in self._doc_generators: + if not "workspace_type" in doc_generator: + logger.error( + "To generate documents for workspaces the workspace type needs to be specified in the payload! Skipping to next document generator..." + ) + success = False + continue + workspace_type = doc_generator["workspace_type"] # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in user and not user["enabled"]: + if "enabled" in doc_generator and not doc_generator["enabled"]: logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + "Payload for document generator of workspace type -> '%s' is disabled. Skipping...", + workspace_type, ) continue - user_password = user["password"] - - # we change the otcs credentials to the user: - self._otcs.set_credentials(user_name, user_password) - - # we re-authenticate as the user: - logger.info("Authenticate user -> %s...", user_name) - # True = force new login with new user - cookie = self._otcs.authenticate(revalidate=True) - if not cookie: - logger.error("Couldn't authenticate user -> %s", user_name) + if not "template_path" in doc_generator: + logger.error( + "To generate documents for workspaces of type -> '%s' the path to the document template needs to be specified in the payload! Skipping to next document generator...", + workspace_type, + ) + success = False + continue + template_path = doc_generator["template_path"] + # 20541 is the ID of the Document Template Volume which + # (we hope) is stable: + template = self._otcs.get_node_by_volume_and_path(20541, template_path) + if not template: + logger.error( + "Cannot find document template in path -> %s. Skipping to next document generator...", + template_path, + ) success = False continue + template_id = self._otcs.get_result_value(template, "id") + template_name = self._otcs.get_result_value(template, "name") - # we update the user profile to activate responsive (dynamic) containers: - response = self._otcs.update_user_profile( - field="responsiveContainerMode", - value=True, - config_section="SmartUI", + if not "classification_path" in doc_generator: + logger.error( + "To generate documents for workspaces of type -> '%s' the path to the document classification needs to be specified in the payload! Skipping to next document generator...", + workspace_type, + ) + success = False + continue + classification_path = doc_generator["classification_path"] + classification = self._otcs.get_node_by_volume_and_path( + 198, classification_path ) - if response is None: - logger.warning( - "Profile for user -> %s couldn't be updated with responsive container mode'!", - user_name, + if not classification: + logger.error( + "Cannot find document classification in path -> %s. Skipping to next document generator...", + classification_path, + ) + success = False + continue + classification_id = self._otcs.get_result_value(classification, "id") + + # "category_name" is optional. But if it is specified + # then also "attributes" needs to be specified: + if not "category_name" in doc_generator: + logger.info( + "No metadata (category name) specified in the payload for this document generator.", + ) + category_name = "" + attributes = {} + category_data = {} + else: + category_name = doc_generator["category_name"] + if not "attributes" in doc_generator: + logger.error( + "To generate documents for workspaces of type -> '%s' with metadata, the attributes needs to be specified in the payload! Skipping to next document generator...", + workspace_type, + ) + success = False + continue + attributes = doc_generator["attributes"] + + # The following method returns two values: the category ID and + # a dict of the attributes. If the category is not found + # on the document template it returns -1 for the category ID + # and an empty dict for the attribute definitions: + ( + category_id, + attribute_definitions, + ) = self._otcs.get_node_category_definition(template_id, category_name) + if category_id == -1: + logger.error( + "The document template -> '%s' does not have the specified category -> %s. Skipping to next document generator...", + template_name, + category_name, + ) + success = False + continue + + category_data = {str(category_id): {}} + + # now we fill the prepared (but empty) category_data + # with the actual attribute values from the payload: + for attribute in attributes: + attribute_name = attribute["name"] + attribute_value = attribute["value"] + attribute_type = attribute_definitions[attribute_name]["type"] + attribute_id = attribute_definitions[attribute_name]["id"] + + # Special treatment for type user: determine the ID for the login name. + # the ID is the actual value we have to put in the attribute: + if attribute_type == "user": + user = self._otcs.get_user( + name=attribute_value, show_error=True + ) + user_id = self._otcs.get_result_value(response=user, key="id") + if not user_id: + logger.error( + "Cannot find user with login name -> '%s'. Skipping...", + attribute_value, + ) + success = False + continue + attribute_value = user_id + category_data[str(category_id)][attribute_id] = attribute_value + + if not "workspace_folder_path" in doc_generator: + logger.info( + "No workspace folder path defined for workspaces of type -> '%s'. Documents will be stored in workspace root.", + workspace_type, + ) + workspace_folder_path = [] + else: + workspace_folder_path = doc_generator["workspace_folder_path"] + + if "exec_as_user" in doc_generator: + exec_as_user = doc_generator["exec_as_user"] + + # Find the user in the users payload: + exec_user = next( + (item for item in self._users if item["name"] == exec_as_user), + None, ) + # Have we found the user in the payload? + if exec_user is not None: + logger.info( + "Executing document generator with user -> %s", exec_as_user + ) + # we change the otcs credentials to the user: + self._otcs.set_credentials(exec_user["name"], exec_user["password"]) + + # we re-authenticate as the user: + logger.info("Authenticate user -> '%s'...", exec_as_user) + # True = force new login with new user + cookie = self._otcs.authenticate(revalidate=True) + if not cookie: + logger.error("Couldn't authenticate user -> '%s'", exec_as_user) + continue + admin_context = False + authenticated_user = exec_as_user + else: + logger.error( + "Cannot find user with login name -> '%s' for executing. Executing as admin...", + exec_as_user, + ) + admin_context = True + success = False else: + admin_context = True + exec_as_user = "admin" + + if admin_context and authenticated_user != "admin": + # Set back admin credentials: + self._otcs.set_credentials( + admin_credentials["username"], admin_credentials["password"] + ) + + # we re-authenticate as the admin user: logger.info( - "Profile for user -> %s has been updated to enable responsive container mode.", - user_name, + "Authenticate as admin user -> '%s'...", + admin_credentials["username"], ) - response = self._otcs.update_user_profile( - field="responsiveContainerMessageMode", - value=True, - config_section="SmartUI", - ) - if response is None: - logger.warning( - "Profile for user -> %s couldn't be updated with responsive container message mode'!", - user_name, + # True = force new login with new user + cookie = self._otcs.authenticate(revalidate=True) + authenticated_user = "admin" + + if category_data: + logger.info( + "Generate documents for workspace type -> '%s' based on template -> '%s' with metadata -> %s...", + workspace_type, + template_name, + category_data, ) else: logger.info( - "Profile for user -> %s has been updated to enable messages for responsive container mode.", - user_name, + "Generate documents for workspace type -> '%s' based on template -> '%s' without metadata...", + workspace_type, + template_name, ) - # we work through the list of favorites defined for the user: - favorites = user["favorites"] - for favorite in favorites: - # check if favorite is a logical workspace name - favorite_item = next( - (item for item in self._workspaces if item["id"] == favorite), None + # Find the workspace type with the name given in the _workspace_types + # datastructure that has been generated by process_workspace_types() method before: + workspace_type_id = next( + ( + item["id"] + for item in self._workspace_types + if item["name"] == workspace_type + ), + None, + ) + workspace_instances = self._otcs.get_workspace_instances( + type_name=workspace_type, type_id=workspace_type_id + ) + if not workspace_instances or not workspace_instances["results"]: + logger.warning( + "No workspace instances found for workspace type -> '%s' (%s)", + workspace_type, + workspace_type_id, ) - is_workspace = False - if favorite_item: - logger.info( - "Found favorite item (workspace) in payload -> %s", - favorite_item["name"], + for workspace_instance in workspace_instances["results"]: + workspace_id = workspace_instance["data"]["properties"]["id"] + workspace_name = workspace_instance["data"]["properties"]["name"] + if workspace_folder_path: + workspace_folder = self._otcs.get_node_by_workspace_and_path( + workspace_id=workspace_id, path=workspace_folder_path ) - favorite_id = self.determine_workspace_id(favorite_item) - if not favorite_id: - logger.warning( - "Workspace of type -> %s and name -> %s does not exist. Cannot create favorite. Skipping...", - favorite_item["type_name"], - favorite_item["name"], + if workspace_folder: + workspace_folder_id = self._otcs.get_result_value( + workspace_folder, "id" ) - continue - - is_workspace = True - else: - # alternatively try to find the item as a nickname: - favorite_item = self._otcs.get_node_from_nickname(favorite) - favorite_id = self._otcs.get_result_value(favorite_item, "id") - # if favorite_item is None: - if favorite_id is None: + else: + # If the workspace template is not matching + # the path we may have an error here. Then + # we fall back to workspace root level. logger.warning( - "Favorite -> %s neither found as workspace ID nor as nickname. Skipping to next favorite...", - favorite, + "Folder path does not exist in workspace -> '%s'. Using workspace root level instead...", + workspace_name, ) - continue - - response = self._otcs.add_favorite(favorite_id) - if response is None: - logger.warning( - "Favorite ID -> %s couldn't be added for user -> %s!", - favorite_id, - user_name, - ) + workspace_folder_id = workspace_id else: - logger.info( - "Added favorite for user -> %s, node ID -> %s.", - user_name, - favorite_id, - ) - logger.info( - "Simulate user -> %s browsing node ID -> %s.", - user_name, - favorite_id, - ) - # simulate a browse by the user to populate recently accessed items - if is_workspace: - response = self._otcs.get_workspace(favorite_id) - else: - response = self._otcs.get_node(favorite_id) + workspace_folder_id = workspace_id - # we work through the list of proxies defined for the user - # (we need to consider that not all users have the proxies element): - proxies = user["proxies"] if user.get("proxies") else [] + document_name = workspace_name + " - " + template_name + logger.info("Generate document -> '%s'", document_name) - for proxy in proxies: - proxy_user = next( - (item for item in self._users if item["name"] == proxy), - None, + response = self._otcs.check_node_name( + int(workspace_folder_id), document_name ) - if not proxy_user or not "id" in proxy_user: - logger.error( - "The proxy -> %s for user -> %s does not exist! Skipping proxy...", - proxy, - user_name, + if response["results"]: + logger.warning( + "Node with name -> '%s' does already exist in workspace folder with ID -> %s", + document_name, + workspace_folder_id, ) - success = False continue - proxy_user_id = proxy_user["id"] - - # Check if the proxy is already set: - if not self._otcs.is_proxy(proxy): - logger.info( - "Set user -> %s (%s) as proxy for user -> %s.", - proxy, - proxy_user_id, - user_name, + response = self._otcs.create_document_from_template( + int(template_id), + int(workspace_folder_id), + int(classification_id), + category_data, + document_name, + "This document has been auto-generated by Terrarium", + ) + if not response: + logger.error( + "Failed to generate document -> '%s' in workspace -> '%s' (%s) as user -> %s", + document_name, + workspace_name, + workspace_id, + exec_as_user, ) - # set the user proxy - currently we don't support time based proxies in payload. - # The called method is ready to support this. - response = self._otcs.add_user_proxy(proxy_user_id) + success = False else: logger.info( - "User -> %s (%s) is already proxy for user -> %s. Skipping...", - proxy, - proxy_user_id, - user_name, + "Successfully generated document -> '%s' in workspace -> '%s'", + document_name, + workspace_name, ) - if self._users: + + if authenticated_user != "admin": # Set back admin credentials: self._otcs.set_credentials( admin_credentials["username"], admin_credentials["password"] ) - # we re-authenticate as the admin user: + # we authenticate back as the admin user: logger.info( - "Authenticate as admin user -> %s...", admin_credentials["username"] + "Authenticate as admin user -> '%s'...", admin_credentials["username"] ) # True = force new login with new user cookie = self._otcs.authenticate(revalidate=True) - # Also for the admin user we want to update the user profile to activate responsive (dynamic) containers: - response = self._otcs.update_user_profile( - field="responsiveContainerMode", - value=True, - config_section="SmartUI", - ) - if response is None: - logger.warning( - "Profile for admin user couldn't be updated with responsive container mode'!", - ) - else: - logger.info( - "Profile for admin user has been updated to enable responsive container mode.", - ) - response = self._otcs.update_user_profile( - field="responsiveContainerMessageMode", - value=True, - config_section="SmartUI", - ) - if response is None: - logger.warning( - "Profile for admin user couldn't be updated with responsive container message mode'!", - ) - else: - logger.info( - "Profile for admin user has been updated to enable messages for responsive container mode.", - ) - - self.write_status_file(success, section_name, self._users) + self.write_status_file(success, section_name, self._doc_generators) return success - # end method definition + # end method definition - def process_security_clearances( - self, section_name: str = "securityClearances" + def process_browser_automations( + self, + browser_automations: list, + section_name: str = "browserAutomations", + check_status: bool = True, ) -> bool: - """Process Security Clearances for Extended ECM. + """Process Selenium-based browser automations. Args: + browser_automations (list): list of browser_automations (need this as parameter as we + have multiple lists) section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type are used (e.g. the "Post" sections) This name is also used for the "success" status files written to the Admin Personal Workspace + check_status (bool, optional): defines whether or not this needs to re-run + for each customizer run (even if it has been successful before). + If check_status is True (default) then it is only re-run + if it has NOT been successfully before. Returns: bool: True if payload has been processed without errors, False otherwise """ - if not self._security_clearances: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not browser_automations: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True # If this payload section has been processed successfully before we # can return True and skip processing it once more: - if self.check_status_file(section_name): + if check_status and self.check_status_file(section_name): return True success: bool = True - for security_clearance in self._security_clearances: - clearance_level = security_clearance.get("level") - clearance_name = security_clearance.get("name") + for browser_automation in browser_automations: + description = browser_automation.get("description", "") - if "enabled" in security_clearance and not security_clearance["enabled"]: + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in browser_automation and not browser_automation["enabled"]: logger.info( - "Payload for Security Clearance -> %s is disabled. Skipping...", - clearance_name, + "Payload for Browser Automation -> '%s' is disabled. Skipping...", + description, ) continue - clearance_description = security_clearance.get("description") - if not clearance_description: - clearance_description = "" - if clearance_level and clearance_name: + if not "name" in browser_automation: + logger.error("Browser automation is missing a unique name. Skipping...") + success = False + continue + name = browser_automation.get("name") + + if description: logger.info( - "Creating Security Clearance -> %s : %s", - clearance_level, - clearance_name, - ) - self._otcs.run_web_report( - "web_report_security_clearance", security_clearance + "Processing Browser Automation -> '%s' (%s)...", + name, + description, ) else: - logger.error( - "Cannot create Security Clearance - either level or name is missing!" - ) - success = False - - self.write_status_file(success, section_name, self._security_clearances) - - return success - - # end method definition - - def process_supplemental_markings( - self, section_name: str = "supplementalMarkings" - ) -> bool: - """Process Supplemental Markings for Extended ECM. - - Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace - Returns: - bool: True if payload has been processed without errors, False otherwise - """ - - if not self._supplemental_markings: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + logger.info("Processing Browser Automation -> '%s'...", name) - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True - - success: bool = True + if not "base_url" in browser_automation: + logger.error("Browser automation is missing base_url. Skipping...") + success = False + continue + base_url = browser_automation.get("base_url") - for supplemental_marking in self._supplemental_markings: - code = supplemental_marking.get("code") + if not "user_name" in browser_automation: + logger.info("Browser automation is not having user name.") + user_name = browser_automation.get("user_name", "") - if ( - "enabled" in supplemental_marking - and not supplemental_marking["enabled"] - ): - logger.info( - "Payload for Supplemental Marking -> %s is disabled. Skipping...", - code, + if not "password" in browser_automation: + logger.info("Browser automation is not having password.") + password = browser_automation.get("password", "") + + if not "automations" in browser_automation: + logger.error( + "Browser automation is missing list of automations. Skipping..." ) + success = False continue + automations = browser_automation.get("automations", []) - description = supplemental_marking.get("description") - if not description: - description = "" - if code: + debug_automation: bool = browser_automation.get("debug", False) + + # Create Selenium Browser Automation: + logger.info("Browser Automation base URL -> %s", base_url) + logger.info("Browser Automation User -> %s", user_name) + logger.debug("Browser Automation Password -> %s", password) + browser_automation_object = BrowserAutomation( + base_url=base_url, + user_name=user_name, + user_password=password, + automation_name=name, + take_screenshots=debug_automation, + ) + # Implicit Wait is a global setting (for whole brwoser session) + # This makes sure a page is fully loaded and elements are present + # before accessing them. We set 15.0 seconds as default if not + # otherwise specified by "wait_time" in the payload. + # See https://www.selenium.dev/documentation/webdriver/waits/ + wait_time = browser_automation.get("wait_time", 15.0) + browser_automation_object.implicit_wait(wait_time) + if "wait_time" in browser_automation: logger.info( - "Creating Supplemental Marking -> %s : %s", code, description + "Browser Automation Implicit Wait time -> '%s' configured", + wait_time, ) - self._otcs.run_web_report( - "web_report_supplemental_marking", supplemental_marking - ) - else: - logger.error( - "Cannot create Supplemental Marking - either code or description is missing!" - ) - success = False - self.write_status_file(success, section_name, self._supplemental_markings) + for automation in automations: + if not "type" in automation: + logger.error("Browser automation step is missing type. Skipping...") + success = False + break + automation_type = automation.get("type", "") + + match automation_type: + case "login": + page = automation.get("page", "") + logger.info( + "Login to -> %s as user -> %s", base_url + page, user_name + ) + user_field = automation.get("user_field", "otds_username") + password_field = automation.get( + "password_field", "otds_password" + ) + login_button = automation.get("login_button", "loginbutton") + if not browser_automation_object.run_login( + page=page, + user_field=user_field, + password_field=password_field, + login_button=login_button, + ): + logger.error( + "Cannot log into -> %s. Stopping automation.", + base_url + page, + ) + success = False + break + else: + logger.info( + "Successfully logged into page -> %s.", base_url + page + ) + case "get_page": + page = automation.get("page", "") + if not page: + logger.error( + "Automation type -> '%s' requires page parameter", + automation_type, + ) + success = False + break + logger.info("Get page -> %s", base_url + page) + if not browser_automation_object.get_page(url=page): + logger.error( + "Cannot get page -> %s. Stopping automation.", + page, + ) + success = False + break + else: + # browser_automation_object.implicit_wait(15.0) # this is global but not command-specific! Don't need it here! + logger.info( + "Successfully loaded page -> %s", base_url + page + ) + case "click_elem": + elem = automation.get("elem", "") + if not elem: + logger.error( + "Automation type -> '%s' requires elem parameter", + automation_type, + ) + success = False + break + find = automation.get("find", "id") + show_error = automation.get("show_error", True) + if not browser_automation_object.find_elem_and_click( + find_elem=elem, find_method=find, show_error=show_error + ): + logger.error( + "Cannot find clickable element -> '%s' on current page. Stopping automation.", + elem, + ) + success = False + break + else: + logger.info("Successfully clicked element -> %s", elem) + case "set_elem": + elem = automation.get("elem", "") + if not elem: + logger.error( + "Automation type -> '%s' requires elem parameter", + automation_type, + ) + success = False + break + find = automation.get("find", "id") + value = automation.get("value", "") + if not value: + logger.error( + "Automation type -> '%s' requires value parameter", + automation_type, + ) + success = False + break + # we also support replacing placeholders that are + # enclosed in double % characters like %%OTCS_RESOURCE_ID%%: + value = self.replace_placeholders(value) + if not browser_automation_object.find_elem_and_set( + find_elem=elem, elem_value=value, find_method=find + ): + logger.error( + "Cannot find element -> '%s' on current page to set value -> '%s'. Stopping automation.", + elem, + value, + ) + success = False + break + else: + logger.info( + "Successfully set element -> '%s' to set value -> '%s'.", + elem, + value, + ) + case _: + logger.error( + "Illegal automation step type -> '%s' in browser automation!", + automation_type, + ) + success = False + break + + if check_status: + self.write_status_file(success, section_name, browser_automations) return success - # end method definition + # end method definition - def process_user_security(self, section_name: str = "userSecurity"): - """Process Security Clearance and Supplemental Markings for Extended ECM users. + def init_sap( + self, sap_external_system: dict, direct_application_server_login: bool = True + ) -> SAP | None: + """Initialize SAP object for RFC communication with SAP S/4HANA. Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace + sap_external_system (dict): SAP external system created before + direct_application_server_login (bool): flag to control wether we comminicate directly with + SAP application server or via a load balancer Returns: - bool: True if payload has been processed without errors, False otherwise + SAP: SAP object """ - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + if not sap_external_system: + return None - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + username = sap_external_system["username"] + password = sap_external_system["password"] + # "external_system_hostname" is extracted from as_url in process_external_systems() + host = sap_external_system["external_system_hostname"] + client = sap_external_system.get("client", "100") + system_number = sap_external_system.get("external_system_number", "00") + system_id = sap_external_system["external_system_name"] + group = sap_external_system.get("group", "PUBLIC") + destination = sap_external_system.get("destination", "") - success: bool = True + logger.info("Connection parameters SAP:") + logger.info("SAP Hostname = %s", host) + logger.info("SAP Client = %s", client) + logger.info("SAP System Number = %s", system_number) + logger.info("SAP System ID = %s", system_id) + logger.info("SAP User Name = %s", username) + if not direct_application_server_login: + logger.info("SAP Group Name (for RFC) = %s", group) + if destination: + logger.info("SAP Destination = %s", destination) - for user in self._users: - user_id = user.get("id") - user_name = user.get("name") + if direct_application_server_login: + logger.info("SAP Login = Direct Application Server (ashost)") + sap_object = SAP( + username=username, + password=password, + ashost=host, + client=client, + system_number=system_number, + system_id=system_id, + destination=destination, + ) + else: + logger.info("SAP Login = Logon with load balancing (mshost)") + sap_object = SAP( + username=username, + password=password, + mshost=host, + group=group, + client=client, + system_number=system_number, + system_id=system_id, + destination=destination, + ) - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in user and not user["enabled"]: - logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name - ) - continue + self._sap = sap_object - # Read security clearance from user payload (it is optional!) - user_security_clearance = user.get("security_clearance") - if user_id and user_security_clearance: - self._otcs.assign_user_security_clearance( - user_id, user_security_clearance + if ( + "archive_logical_name" in sap_external_system + and "archive_certificate_file" in sap_external_system + and self._otac + ): + logger.info( + "Put certificate file -> '%s' for logical archive -> '%s' into Archive Center", + sap_external_system["archive_certificate_file"], + sap_external_system["archive_logical_name"], + ) + response = self._otac.put_cert( + sap_external_system["external_system_name"], + sap_external_system["archive_logical_name"], + sap_external_system["archive_certificate_file"], + ) + if not response: + logger.error("Failed to install Archive Center certificate!") + else: + logger.info( + "Enable certificate file -> '%s' for logical archive -> '%s'", + sap_external_system["archive_certificate_file"], + sap_external_system["archive_logical_name"], ) - - # Read supplemental markings from user payload (it is optional!) - user_supplemental_markings = user.get("supplemental_markings") - if user_id and user_supplemental_markings: - self._otcs.assign_user_supplemental_markings( - user_id, user_supplemental_markings + response = self._otac.enable_cert( + sap_external_system["external_system_name"], + sap_external_system["archive_logical_name"], + True, ) + if not response: + logger.debug("Failed to enable Archive Center certificate!") - self.write_status_file(success, section_name, self._users) - - return success + return sap_object - # end method definition + # end method definition - def process_records_management_settings( - self, section_name: str = "recordsManagementSettings" - ) -> bool: - """Process Records Management Settings for Extended ECM. - The setting files need to be placed in the OTCS file system file via - a transport into the Support Asset Volume. + def process_sap_rfcs(self, sap_object: SAP, section_name: str = "sapRFCs") -> bool: + """Process SAP RFCs in payload and run them in SAP S/4HANA. Args: + sap_object (SAP): SAP object section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type are used (e.g. the "Post" sections) @@ -7711,9 +12604,9 @@ def process_records_management_settings( bool: True if payload has been processed without errors, False otherwise """ - if not self._records_management_settings: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + if not sap_object: + logger.info("SAP object is undefined. Cannot call RFCs. Bailing out.") + return False # If this payload section has been processed successfully before we # can return True and skip processing it once more: @@ -7722,2190 +12615,4616 @@ def process_records_management_settings( success: bool = True - if ( - "records_management_system_settings" in self._records_management_settings - and self._records_management_settings["records_management_system_settings"] - != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings[ - "records_management_system_settings" - ] - ) - response = self._otcs.import_records_management_settings(filename) - if not response: - success = False + for sap_rfc in self._sap_rfcs: + rfc_name = sap_rfc["name"] - if ( - "records_management_codes" in self._records_management_settings - and self._records_management_settings["records_management_codes"] != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings["records_management_codes"] - ) - response = self._otcs.import_records_management_codes(filename) - if not response: - success = False + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in sap_rfc and not sap_rfc["enabled"]: + logger.info( + "Payload for SAP RFC -> '%s' is disabled. Skipping...", rfc_name + ) + continue - if ( - "records_management_rsis" in self._records_management_settings - and self._records_management_settings["records_management_rsis"] != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings["records_management_rsis"] + rfc_description = ( + sap_rfc["description"] if sap_rfc.get("description") else "" ) - response = self._otcs.import_records_management_rsis(filename) - if not response: - success = False - if ( - "physical_objects_system_settings" in self._records_management_settings - and self._records_management_settings["physical_objects_system_settings"] - != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings["physical_objects_system_settings"] - ) - response = self._otcs.import_physical_objects_settings(filename) - if not response: - success = False + # be careful to avoid key errors as SAP RFC parameters are optional: + rfc_params = sap_rfc["parameters"] if sap_rfc.get("parameters") else {} + if rfc_params: + logger.info( + "Calling SAP RFC -> '%s' (%s) with parameters -> %s ...", + rfc_name, + rfc_description, + rfc_params, + ) + else: + logger.info( + "Calling SAP RFC -> '%s' (%s) without parameters...", + rfc_name, + rfc_description, + ) - if ( - "physical_objects_codes" in self._records_management_settings - and self._records_management_settings["physical_objects_codes"] != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings["physical_objects_codes"] + # be careful to avoid key errors as SAP RFC parameters are optional: + rfc_call_options = ( + sap_rfc["call_options"] if sap_rfc.get("call_options") else {} ) - response = self._otcs.import_physical_objects_codes(filename) - if not response: - success = False + if rfc_call_options: + logger.debug("Using call options -> '%s' ...", rfc_call_options) - if ( - "physical_objects_locators" in self._records_management_settings - and self._records_management_settings["physical_objects_locators"] != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings["physical_objects_locators"] - ) - response = self._otcs.import_physical_objects_locators(filename) - if not response: + result = sap_object.call(rfc_name, rfc_call_options, rfc_params) + if result is None: + logger.error("Failed to call SAP RFC -> '%s'", rfc_name) success = False - - if ( - "security_clearance_codes" in self._records_management_settings - and self._records_management_settings["security_clearance_codes"] != "" - ): - filename = ( - self._custom_settings_dir - + self._records_management_settings["security_clearance_codes"] - ) - response = self._otcs.import_security_clearance_codes(filename) - if not response: + elif result.get("RESULT") != "OK": + logger.error( + "Result of SAP RFC -> '%s' is not OK, it returned -> '%s' failed items in result -> %s", + rfc_name, + str(result.get("FAILED")), + str(result), + ) success = False + else: + logger.info( + "Successfully called RFC -> '%s'. Result -> %s", + rfc_name, + str(result), + ) + # Save result for status file content + sap_rfc["result"] = result - self.write_status_file(success, section_name, self._records_management_settings) + self.write_status_file(success, section_name, self._sap_rfcs) return success - # end method definition + # end method definition - def process_holds(self, section_name: str = "holds") -> bool: - """Process Records Management Holds for Extended ECM users. + def init_successfactors( + self, sucessfactors_external_system: dict + ) -> SuccessFactors | None: + """Initialize SuccessFactors object for workspace creation. This is needed + synchronize user passwords and emails with SuccessFactors. Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace + sucessfactors_external_system (dict): SuccessFactors external system created before Returns: - bool: True if payload has been processed without errors, False otherwise + SuccessFactors: SuccessFactors object """ - if not self._holds: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + def extract_company_from_url(url: str) -> str: + parsed_url = urlparse(url) + query_params = parse_qs(parsed_url.query) + company_value = query_params.get("company", [""])[0] + return company_value - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + if not sucessfactors_external_system: + return None - success: bool = True + username = sucessfactors_external_system["username"] + password = sucessfactors_external_system["password"] + base_url = sucessfactors_external_system["base_url"] + as_url = sucessfactors_external_system["as_url"] + saml_url = sucessfactors_external_system.get("saml_url", "") + company_id = extract_company_from_url(saml_url) + client_id = sucessfactors_external_system["oauth_client_id"] + client_secret = sucessfactors_external_system["oauth_client_secret"] + + logger.info("Connection parameters SuccessFactors:") + logger.info("SuccessFactors base URL = %s", base_url) + logger.info("SuccessFactors application URL = %s", as_url) + logger.info("SuccessFactors username = %s", username) + logger.debug("SuccessFactors password = %s", password) + logger.info("SuccessFactors client ID = %s", client_id) + logger.debug("SuccessFactors client secret = %s", client_secret) + logger.info("SuccessFactors company ID (tenant) = %s", company_id) + successfactors_object = SuccessFactors( + base_url=base_url, + as_url=as_url, + client_id=client_id, + client_secret=client_secret, + username=username, + password=password, + company_id=company_id, + ) - for hold in self._holds: - if not "name" in hold: - logger.error("Cannot create Hold without a name! Skipping...") - continue - hold_name = hold["name"] + self._successfactors = successfactors_object - if not "type" in hold: - logger.error( - "Cannot create Hold -> %s without a type! Skipping...", hold_name - ) - success = False - continue - hold_type = hold["type"] + return successfactors_object - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in hold and not hold["enabled"]: - logger.info( - "Payload for Hold -> %s is disabled. Skipping...", hold_name - ) - continue + # end method definition - hold_group = hold.get("group") - hold_comment = hold.get("comment") - hold_alternate_id = hold.get("alternate_id") - hold_date_applied = hold.get("date_applied") - hold_date_suspend = hold.get("date_to_remove") + def init_salesforce(self, salesforce_external_system: dict) -> Salesforce | None: + """Initialize Salesforce object for workspace creation. This is needed to query Salesforce API + to lookup IDs of Salesforce objects. - # 550 is the RM Volume - response = self._otcs.get_node_by_volume_and_path(550, ["Hold Maintenance"]) - if not response: - logger.error("Cannot find Records Management Volume!") - continue - holds_maintenance_id = self._otcs.get_result_value(response, "id") - if not holds_maintenance_id: - logger.error( - "Cannot find Holds Maintenance folder in Records Management Volume!" - ) - continue + Args: + salesfoce_external_system (dict): Salesforce external system created before + Returns: + Salesforce: Salesforce object + """ - if hold_group: - # Check if the Hold Group (folder) does already exist. - response = self._otcs.get_node_by_parent_and_name( - holds_maintenance_id, hold_group - ) - parent_id = self._otcs.get_result_value(response, "id") - if not parent_id: - response = self._otcs.create_item( - holds_maintenance_id, "833", hold_group - ) - parent_id = self._otcs.get_result_value(response, "id") - if not parent_id: - logger.error("Failed to create hold group -> %s", hold_group) - continue - else: - parent_id = holds_maintenance_id + if not salesforce_external_system: + return None + + username = salesforce_external_system["username"] + password = salesforce_external_system["password"] + base_url = salesforce_external_system["base_url"] + authorization_url = salesforce_external_system.get("token_endpoint", "") + client_id = salesforce_external_system["oauth_client_id"] + client_secret = salesforce_external_system["oauth_client_secret"] + + logger.info("Connection parameters Salesforce:") + logger.info("Salesforce base URL = %s", base_url) + logger.info("Salesforce authorization URL = %s", base_url) + logger.info("Salesforce username = %s", username) + logger.debug("Salesforce password = %s", password) + logger.info("Salesforce client ID = %s", client_id) + logger.debug("Salesforce client secret = %s", client_secret) + salesforce_object = Salesforce( + base_url=base_url, + client_id=client_id, + client_secret=client_secret, + username=username, + password=password, + authorization_url=authorization_url, + ) + + self._salesforce = salesforce_object + + return salesforce_object + + # end method definition + + def process_bulk_datasource_otcs(self, data_source: dict) -> Data: + """Load data from Extended ECM / Content Server data source into the data frame of the Data class (see helper/data.py) + + Args: + data_source (dict): Payload dict element for the data source + + Returns: + Data: Data class that includes a Pandas DataFrame + + Side Effects: + self._servicenow is set to the PHT object created by this method + """ - # Holds are special - they ahve folders that cannot be traversed - # in the normal way - we need to get the whole list of holds and use - # specialparameters for the exist_result_items() method as the REST - # API calls delivers a results->data->holds structure (not properties) - response = self._otcs.get_records_management_holds() - if self._otcs.exist_result_item( - response, "HoldName", hold_name, property_name="holds" - ): - logger.info("Hold -> %s does already exist. Skipping...", hold_name) - continue + # 1. Read and validate values from the data source payload: + otcs_hostname = data_source.get("otcs_hostname", "") + if not otcs_hostname: + logger.error( + "Content Server hostname (otcs_hostname) is not specified in payload of bulk data source. Cannot load data!" + ) + return None + otcs_protocol = data_source.get("otcs_protocol", "https") + otcs_port = data_source.get("otcs_port", "443") + otcs_basepath = data_source.get("otcs_basepath", "/cs/cs") + otcs_username = data_source.get("otcs_username", "") + otcs_password = data_source.get("otcs_password", "") + if not otcs_username or not otcs_password: + logger.error( + "Content Server user name (otcs_username) or password (otcs_password) are missing in payload of bulk data source. Cannot load data!" + ) + return None + otcs_thread_number = data_source.get("otcs_thread_number", BULK_THREAD_NUMBER) + otcs_download_dir = data_source.get("otcs_download_dir", "/data/contentserver") + otcs_root_node_id = data_source.get("otcs_root_node_id") + # Filter workspace by depth under the given root (only consider items as workspace if they have the right depth in the hierarchy): + otcs_filter_workspace_depth = data_source.get("otcs_filter_workspace_depth", 0) + # Filter workspace by subtype (only consider items as workspace if they have the right technical subtype): + # This is NOT the workspace type but the technical subtype (like 848 for workspaces and 0 for folder) + otcs_filter_workspace_subtypes = data_source.get( + "otcs_filter_workspace_subtypes", [] + ) + # Filter workspace by category name (only consider items as workspace if they have the category): + otcs_filter_workspace_category = data_source.get( + "otcs_filter_workspace_category", None + ) + # Filter workspace by attribute values (only consider items as workspace if they have the attributes with the defined values): + otcs_filter_workspace_attributes = data_source.get( + "otcs_filter_workspace_attributes", None + ) - hold = self._otcs.create_records_management_hold( - hold_type, - hold_name, - hold_comment, - hold_alternate_id, - int(parent_id), - hold_date_applied, - hold_date_suspend, + if not otcs_root_node_id: + logger.error( + "Content Server root node ID for traversal is missing in payload of bulk data source. Cannot load data!" ) + return None - if hold and hold["holdID"]: - logger.info( - "Successfully created hold -> %s with ID -> %s", - hold_name, - hold["holdID"], - ) - else: - success = False + logger.info( + "Loading data from Content Server (folder, workspaces, items) from root ID -> %s.", + otcs_root_node_id, + ) - self.write_status_file(success, section_name, self._holds) + # 2. Creating the ServiceNow object: + self._otcs_source = OTCS( + protocol=otcs_protocol, + hostname=otcs_hostname, + port=otcs_port, + base_path=otcs_basepath, + username=otcs_username, + password=otcs_password, + thread_number=otcs_thread_number, + download_dir=otcs_download_dir, + ) - return success + # 3. Authenticate at ServiceNow + self._otcs_source.authenticate() - # end method definition + # 4. Load the Content Server data into the Data object (Pandas DataFrame): + if not self._otcs_source.load_items( + node_id=otcs_root_node_id, + filter_workspace_depth=otcs_filter_workspace_depth, + filter_workspace_subtypes=otcs_filter_workspace_subtypes, + filter_workspace_category=otcs_filter_workspace_category, + filter_workspace_attributes=otcs_filter_workspace_attributes, + ): + logger.error("Failure during load of Content Server items!") + return None + data = self._otcs_source.get_data() + if not data: + logger.error("Failure during load of Content Server items! No data loaded!") + return None - def process_additional_group_members( - self, section_name: str = "additionalGroupMemberships" - ) -> bool: - """Process additional groups memberships we want to have in OTDS. + return data + + # end method definition + + def process_bulk_datasource_servicenow(self, data_source: dict) -> Data: + """Load data from ServiceNow data source into the data frame of the Data class (see helper/data.py) Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace - Returns: - bool: True if payload has been processed without errors, False otherwise - """ + data_source (dict): Payload dict element for the data source - if not self._additional_group_members: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + Returns: + Data: Data class that includes a Pandas DataFrame - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + Side Effects: + self._servicenow is set to the PHT object created by this method + """ - success: bool = True + # 1. Read and validate values from the data source payload: + sn_base_url = data_source.get("sn_base_url", "") + if not sn_base_url: + logger.error( + "ServiceNow base URL (sn_base_url) is not specified in payload of bulk data source. Cannot load data!" + ) + return None + sn_auth_type = data_source.get("sn_auth_type", "basic") + sn_username = data_source.get("sn_username", "") + sn_password = data_source.get("sn_password", "") + sn_client_id = data_source.get("sn_client_id", None) + sn_client_secret = data_source.get("sn_client_secret", None) + sn_table_name = data_source.get( + "sn_table_name", "u_kb_template_technical_article_public" + ) + sn_query = data_source.get("sn_query", None) + sn_thread_number = data_source.get("sn_thread_number", BULK_THREAD_NUMBER) + sn_download_dir = data_source.get("sn_download_dir", "/data/knowledgebase") + if ( + sn_base_url + and (sn_auth_type == "basic") + and (not sn_username or not sn_password) + ): + logger.error( + "ServiceNow Basic Authentication needs username and password in payload!" + ) + return None + if ( + sn_base_url + and (sn_auth_type == "oauth") + and (not sn_client_id or not sn_client_secret) + ): + logger.error( + "ServiceNow OAuth Authentication needs client ID and client secret in payload!" + ) + return None - for additional_group_member in self._additional_group_members: - if not "parent_group" in additional_group_member: - logger.error("Missing parent_group! Skipping...") - continue - parent_group = additional_group_member["parent_group"] + logger.info( + "Loading data from ServiceNow (Knowledge Base Articles) with query -> '%s'", + sn_query, + ) - if ( - "enabled" in additional_group_member - and not additional_group_member["enabled"] - ): - logger.info( - "Payload for Additional Group Member with Parent Group -> %s is disabled. Skipping...", - parent_group, - ) - continue + # 2. Creating the ServiceNow object: + self._servicenow = ServiceNow( + base_url=sn_base_url, + auth_type=sn_auth_type, + client_id=sn_client_id, + client_secret=sn_client_secret, + username=sn_username, + password=sn_password, + thread_number=sn_thread_number, + download_dir=sn_download_dir, + ) - if (not "user_name" in additional_group_member) and ( - not "group_name" in additional_group_member - ): - logger.error( - "Either group_name or user_name need to be specified! Skipping..." - ) - success = False - continue - if "group_name" in additional_group_member: - group_name = additional_group_member["group_name"] - logger.info( - "Adding group -> %s to parent group -> %s in OTDS.", - group_name, - parent_group, - ) - response = self._otds.add_group_to_parent_group( - group_name, parent_group - ) - if not response: - logger.error( - "Failed to add group -> %s to parent group -> %s in OTDS.", - group_name, - parent_group, - ) - success = False - elif "user_name" in additional_group_member: - user_name = additional_group_member["user_name"] - logger.info( - "Adding user -> %s to group -> %s in OTDS.", user_name, parent_group - ) - response = self._otds.add_user_to_group(user_name, parent_group) - if not response: - logger.error( - "Failed to add user -> %s to group -> %s in OTDS.", - user_name, - parent_group, - ) - success = False + # 3. Authenticate at ServiceNow + auth_data = self._servicenow.authenticate(auth_type=sn_auth_type) + if not auth_data: + logger.error("Failed to authenticate at ServiceNow -> %s", sn_base_url) + return None + else: + logger.info("Successfully authenticated at ServiceNow -> %s", sn_base_url) - self.write_status_file(success, section_name, self._additional_group_members) + # 4. Load the ServiceNow data into the Data object (Pandas DataFrame): + if not self._servicenow.load_articles(table_name=sn_table_name, query=sn_query): + logger.error("Failure during load of ServiceNow articles!") + return None + data = self._servicenow.get_data() + if not data: + logger.error( + "Failure during load of ServiceNow articles! No articles loaded!" + ) + return None - return success + return data - # end method definition + # end method definition - def process_additional_access_role_members( - self, section_name: str = "additionalAccessRoleMemberships" - ) -> bool: - """Process additional access role memberships we want to have in OTDS. + def process_bulk_datasource_otmm(self, data_source: dict) -> Data: + """Load data from OTMM data source into the data frame of the Data class (see helper/data.py) Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace + data_source (dict): Payload dict element for the data source + Returns: - bool: True if payload has been processed without errors, False otherwise + Data: Data class that includes a Pandas DataFrame + + Side Effects: + self._otmm is set to the OTMM object created by this method """ - if not self._additional_access_role_members: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + # 1. Read and validate values from the data source payload: + otmm_base_url = data_source.get("otmm_base_url", "") + if not otmm_base_url: + logger.error( + "OTMM base URL (otmm_base_url) is not specified in payload of bulk data source. Cannot load data!" + ) + return None + otmm_username = data_source.get("otmm_username", "") + otmm_password = data_source.get("otmm_password", "") + otmm_client_id = data_source.get("otmm_client_id", None) + otmm_client_secret = data_source.get("otmm_client_secret", None) + otmm_thread_number = data_source.get("otmm_thread_number", BULK_THREAD_NUMBER) + otmm_download_dir = data_source.get("otmm_download_dir", "/data/mediaassets") + otmm_business_unit_exclusions = data_source.get( + "otmm_business_unit_exclusions", [] + ) + otmm_product_exclusions = data_source.get("otmm_product_exclusions", []) - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + logger.info( + "Loading data from OpenText Media Management -> %s (Marketing Assets)...", + otmm_base_url, + ) - success: bool = True + # 2. Creating the OTMM object: + self._otmm = OTMM( + base_url=otmm_base_url, + client_id=otmm_client_id, + client_secret=otmm_client_secret, + username=otmm_username, + password=otmm_password, + thread_number=otmm_thread_number, + download_dir=otmm_download_dir, + business_unit_exclusions=otmm_business_unit_exclusions, + product_exclusions=otmm_product_exclusions, + ) - for additional_access_role_member in self._additional_access_role_members: - if not "access_role" in additional_access_role_member: - logger.error("Missing access_role! Skipping...") - continue - access_role = additional_access_role_member["access_role"] + # 3. Authenticate at OTMM + token = self._otmm.authenticate() + if not token: + logger.error( + "Failed to authenticate at OpenText Media Management -> %s", + otmm_base_url, + ) + return None + else: + logger.info( + "Successfully authenticated at OpenText Media Management -> %s", + otmm_base_url, + ) - if ( - "enabled" in additional_access_role_member - and not additional_access_role_member["enabled"] - ): - logger.info( - "Payload for Additional Member for AccessRole -> %s is disabled. Skipping...", - access_role, - ) - continue + # 4. Load the OTMM assets into the Data object (Pandas DataFrame): + if not self._otmm.load_assets(): + logger.error("Failure during load of OpenText Media Management assets!") + return None + data = self._otmm.get_data() + if not data: + logger.error( + "Failure during load of OpenText Media Management assets! No assets loaded!" + ) + return None - if ( - (not "user_name" in additional_access_role_member) - and (not "group_name" in additional_access_role_member) - and (not "partition_name" in additional_access_role_member) - ): - logger.error( - "Either group_name or user_name need to be specified! Skipping..." - ) - success = False - continue - if "group_name" in additional_access_role_member: - group_name = additional_access_role_member["group_name"] - logger.info( - "Adding group -> %s to access role -> %s in OTDS.", - group_name, - access_role, - ) - response = self._otds.add_group_to_access_role(access_role, group_name) - if not response: - logger.error( - "Failed to add group -> %s to access role -> %s in OTDS.", - group_name, - access_role, - ) - success = False - elif "user_name" in additional_access_role_member: - user_name = additional_access_role_member["user_name"] - logger.info( - "Adding user -> %s to access role -> %s in OTDS.", - user_name, - access_role, - ) - response = self._otds.add_user_to_access_role(access_role, user_name) - if not response: - logger.error( - "Failed to add user -> %s to access role -> %s in OTDS.", - user_name, - access_role, - ) - success = False - elif "partition_name" in additional_access_role_member: - partition_name = additional_access_role_member["partition_name"] - logger.info( - "Adding partition -> %s to access role -> %s in OTDS.", - partition_name, - access_role, - ) - response = self._otds.add_partition_to_access_role( - access_role, partition_name - ) - if not response: - logger.error( - "Failed to add partition -> %s to access role -> %s in OTDS.", - partition_name, - access_role, - ) - success = False + return data + + # end method definition + + def process_bulk_datasource_pht(self, data_source: dict) -> Data: + """Load data from OpenText PHT data source into the data frame of the Data class (see helper/data.py) + + Args: + data_source (dict): Payload dict element for the data source + + Returns: + Data: Data class that includes a Pandas DataFrame + + Side Effects: + self._pht is set to the PHT object created by this method + """ + + # 1. Read and validate values from the data source payload: + pht_base_url = data_source.get("pht_base_url", "") + if not pht_base_url: + logger.error( + "PHT base URL (pht_base_url) is not specified in payload of bulk data source. Cannot load data!" + ) + return None + pht_username = data_source.get("pht_username", "") + if not pht_username: + logger.error( + "PHT username (pht_username) is not specified in payload of bulk data source. Cannot load data!" + ) + return None + pht_password = data_source.get("pht_password", "") + if not pht_password: + logger.error( + "PHT password (pht_password) is not specified in payload of bulk data source. Cannot load data!" + ) + return None - self.write_status_file( - success, section_name, self._additional_access_role_members + logger.info( + "Loading data from OpenText PHT -> %s (Product Hierarchy)...", + pht_base_url, ) - return success + # 2. Creating the PHT object: + self._pht = PHT( + base_url=pht_base_url, + username=pht_username, + password=pht_password, + ) - # end method definition + # 3. Authenticate at PHT + token = self._pht.authenticate() + if not token: + logger.error( + "Failed to authenticate at OpenText PHT -> %s", + pht_base_url, + ) + return None + else: + logger.info( + "Successfully authenticated at OpenText PHT -> %s", + pht_base_url, + ) - def process_renamings(self, section_name: str = "renamings") -> bool: - """Process renamings specified in payload and rename existing Extended ECM items. + # 4. Load the OTMM assets into the Data object (Pandas DataFrame): + if not self._pht.load_products(): + logger.error("Failure during load of OpenText PHT products!") + return None + data = self._pht.get_data() + if not data: + logger.error("Failure during load of OpenText PHT product data!") + return None + + return data + + # end method definition + + def process_bulk_datasource_excel(self, data_source: dict) -> Data: + """Load data from Excel files into the data frame of the Data class (see helper/data.py) Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace + data_source (dict): Payload dict element for the data source + Returns: - bool: True if payload has been processed without errors, False otherwise + Data: Data class that includes a Pandas DataFrame """ - if not self._renamings: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + # 1. Read and validate values from the data source payload: + xlsx_files = data_source.get("xlsx_files", []) + if not xlsx_files: + logger.error( + "Excel files not specified in payload of bulk data source. Cannot load data!" + ) + return None + xlsx_sheets = data_source.get("xlsx_sheets", 0) # use 0 not None! + xlsx_columns = data_source.get("xlsx_columns", None) + xlsx_skip_rows = data_source.get("xlsx_skip_rows", 0) + xlsx_na_values = data_source.get("xlsx_na_values", None) + + # 2. Initialize Data object: + data = Data() + + # 3. Iterate of Excel files and load them into the Data object: + for xlsx_file in xlsx_files: + if not data.load_excel_data( + xlsx_path=xlsx_file, + sheet_names=xlsx_sheets, + usecols=xlsx_columns, + skip_rows=xlsx_skip_rows, + na_values=xlsx_na_values, + ): + logger.error("Failed to load Excel file -> '%s'!", xlsx_file) + return None - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + return data - success: bool = True + # end method definition - for renaming in self._renamings: - if not "nodeid" in renaming: - if not "volume" in renaming: - logger.error( - "Renamings require either a node ID or a volume! Skipping to next renaming..." - ) - continue - # Determine object ID of volume: - volume = self._otcs.get_volume(renaming["volume"]) - node_id = self._otcs.get_result_value(volume, "id") - else: - node_id = renaming["nodeid"] + def process_bulk_datasource_xml(self, data_source: dict) -> Data: + """Load data from XML files or directories or zip files into the data frame of the Data class (see helper/data.py) - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in renaming and not renaming["enabled"]: - logger.info("Payload for Renaming is disabled. Skipping...") - continue + Args: + data_source (dict): Payload dict element for the data source - response = self._otcs.rename_node( - int(node_id), renaming["name"], renaming["description"] + Returns: + Data: Data class that includes a Pandas DataFrame + """ + + # 1. Read and validate values from the data source payload: + xml_files = data_source.get("xml_files", []) + xml_directories = data_source.get( + "xml_directories", [] + ) # can also be zip files + xml_xpath = data_source.get("xml_xpath", None) + + # 2. Initialize Data object: + data = Data() + + # 3. If no XML directories are specified we interpret the "xml_files" + if not xml_directories: + for xml_file in xml_files: + logger.info("Loading XML file -> '%s'...", xml_file) + if not data.load_xml_data(xml_path=xml_file, xpath=xml_xpath): + logger.error("Failed to load XML file -> '%s'!", xml_file) + return None + + # 4. If XML directories or zip files of XML files are given we traverse them instead: + for xml_directory in xml_directories: + logger.info( + "Loading XML files from directory or ZIP file -> '%s'...", xml_directory ) - if not response: + # we now produce a list of dictionaries: + xml_data = XML.load_xml_files_from_directory( + path_to_root=xml_directory, filenames=xml_files, xpath=xml_xpath + ) + if not xml_data: logger.error( - "Failed to rename node ID -> %s to new name -> %s.", - node_id, - renaming["name"], + "Failed to load XML files from directory or ZIP file -> '%s'!", + xml_directory, ) - success = False + return None - self.write_status_file(success, section_name, self._renamings) + data.append(add_data=xml_data) - return success + return data - # end method definition + # end method definition - def process_items(self, items: list, section_name: str = "items") -> bool: - """Process items specified in payload and create them in Extended ECM. + def process_bulk_datasource_json(self, data_source: dict) -> Data: + """Load data from JSON files into the data frame of the Data class (see helper/data.py) Args: - items (list): list of items to create (need this as parameter as we - have multiple lists) - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections like "itemsPost") - This name is also used for the "success" status - files written to the Admin Personal Workspace + data_source (dict): Payload dict element for the data source + Returns: - bool: True if payload has been processed without errors, False otherwise + Data: Data class that includes a Pandas DataFrame """ - if not items: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + # 1. Read and validate values from the data source payload: + json_files = data_source.get("json_files", []) + if not json_files: + logger.error( + "JSON files not specified in payload of bulk data source. Cannot load data!" + ) + return None - return True + # 2. Initialize Data object: + data = Data() - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + # 3. Iterate JSON files and load data into Data object: + for json_file in json_files: + if not data.load_json_data(json_path=json_file): + logger.error("Invalid JSON file -> '%s'. Cannot load it!", json_file) + return None - success: bool = True + return data - for item in items: - if not "name" in item: - logger.error("Item needs a name. Skipping...") - continue - item_name = item["name"] + # end method definition - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in item and not item["enabled"]: + def process_bulk_datasource_csv(self, data_source: dict) -> Data: + """Load data from CSV files (Comma-separated values) into the data frame of the Data class (see helper/data.py) + + Args: + data_source (dict): Payload dict element for the data source + + Returns: + Data: Data class that includes a Pandas DataFrame + """ + + # 1. Read and validate values from the data source payload: + csv_files = data_source.get("csv_files", []) + if not csv_files: + logger.error( + "CSV files not specified in payload of bulk data source. Cannot load data!" + ) + return None + + # 2. Initialize Data object: + data = Data() + + # 3. Iterate JSON files and load data into Data object: + for csv_file in csv_files: + if not data.load_csv_data(csv_path=csv_file): + logger.error("failed to load CSV file -> '%s'!", csv_file) + return None + + return data + + # end method definition + + def process_bulk_datasource( + self, + data_source_name: str, + force_reload: bool = True, + ) -> Data | None: + """Process a datasource that is given by a payload element, parse its + properties and deliver a 'Data' object which is a wrapper for + a Pandas DataFrame. + + Args: + data_source_name (str): data source name + force_reload (bool): Force a reload of the data source if True + + Returns: + Data | None: object of type Data + """ + + if not data_source_name: + logger.error("Missing data source name!") + return None + + self._log_header_callback( + text="Process Bulk Data Source -> '{}'".format(data_source_name), char="-" + ) + + logger.info( + "Found specified data source name -> '%s'. Lookup the data source payload...", + data_source_name, + ) + data_source = next( + ( + item + for item in self._bulk_datasources + if item["name"] == data_source_name + ), + None, + ) + if not data_source: # does this datasource not exist? + logger.error( + "Cannot find specified data source -> '%s' in payload!", + data_source_name, + ) + return None + + # Check if data has already been loaded for the datasource: + if "data" in data_source and not force_reload: + logger.info( + "Data for datasource -> '%s' is already loaded and reload is not enforced. Return existing data...", + data_source_name, + ) + return data_source["data"] + else: + if force_reload: logger.info( - "Payload for Item -> %s is disabled. Skipping...", item_name + "Reload of data from datasource -> '%s' is enforced. Building data...", + data_source_name, ) - continue - - if not "description" in item: - item_description = "" else: - item_description = item["description"] + logger.info( + "Data for datasource -> '%s' is not yet available. Building data...", + data_source_name, + ) - parent_nickname = item.get("parent_nickname") - parent_path = item.get("parent_path") + data_source_type = data_source.get("type", None) + if not data_source_type: + logger.error( + "Data source needs a type parameter. This is new - you may need to add it to your bulkDataSource payload definition file! Cannot load data." + ) + return None - if parent_nickname: - parent_node = self._otcs.get_node_from_nickname(parent_nickname) - parent_id = self._otcs.get_result_value(parent_node, "id") - # if not parent_node: - if not parent_id: + match data_source_type: + case "excel": + data = self.process_bulk_datasource_excel(data_source=data_source) + if not data: + logger.error("Failure during load of ServiceNow articles!") + return None + case "servicenow": + data = self.process_bulk_datasource_servicenow(data_source=data_source) + if not data: + logger.error("Failure during load of ServiceNow articles!") + return None + case "otmm": + data = self.process_bulk_datasource_otmm(data_source=data_source) + if not data: logger.error( - "Item -> %s has a parent nickname -> %s that does not exist. Skipping...", - item_name, - parent_nickname, + "Failure during load of OpenText Media Management assets!" ) - success = False - continue - else: # use parent_path and Enterprise Volume - parent_node = self._otcs.get_node_by_volume_and_path(141, parent_path) - parent_id = self._otcs.get_result_value(parent_node, "id") - if not parent_id: - # if not parent_node: + return None + case "otcs": + data = self.process_bulk_datasource_otcs(data_source=data_source) + if not data: logger.error( - "Item -> %s has a parent path that does not exist. Skipping...", - item_name, + "Failure during load of OpenText Content Server items!" ) - success = False - continue - - original_nickname = item.get("original_nickname") - original_path = item.get("original_path") - - if original_nickname: - original_node = self._otcs.get_node_from_nickname(original_nickname) - original_id = self._otcs.get_result_value(original_node, "id") - if not original_id: - # if not original_node: + return None + case "pht": + data = self.process_bulk_datasource_pht(data_source=data_source) + if not data: logger.error( - "Item -> %s has a original nickname -> %s that does not exist. Skipping...", - item_name, - original_nickname, + "Failure during load of OpenText Product Hierarchy (PHT)!" ) - success = False - continue - elif original_path: - original_node = self._otcs.get_node_by_volume_and_path( - 141, original_path + return None + case "json": + data = self.process_bulk_datasource_json(data_source=data_source) + if not data: + logger.error("Failure during load of JSON data source!") + return None + case "xml": + data = self.process_bulk_datasource_xml(data_source=data_source) + if not data: + logger.error("Failure during load of XML data source!") + return None + case _: + logger.error( + "Illegal data source type. Types supported: 'excel', 'servicenow', 'otmm', 'otcs', 'pht', 'json'" ) - original_id = self._otcs.get_result_value(original_node, "id") - if not original_id: - # if not original_node: - logger.error( - "Item -> %s has a original path that does not exist. Skipping...", - item_name, - ) - success = False - continue - else: - original_id = 0 + return None - if not "type" in item: - logger.error("Item -> %s needs a type. Skipping...", item_name) - success = False + logger.info( + "Data Frame for source -> '%s' has %s row(s) and %s column(s) after data loading.", + data_source_name, + data.get_data_frame().shape[0], + data.get_data_frame().shape[1], + ) + + cleansings = data_source.get("cleansings", {}) + columns_to_drop = data_source.get("columns_to_drop", []) + columns_to_keep = data_source.get("columns_to_keep", []) + columns_to_add = data_source.get("columns_to_add", []) + conditions = data_source.get("conditions", []) + explosions = data_source.get("explosions", []) + + # Add columns if specified in data_source: + for add_column in columns_to_add: + if not "source_column" in add_column or not "name" in add_column: + logger.error( + "Add columns is missing name or source column. Column will not be added." + ) continue + data.add_column( + source_column=add_column["source_column"], + new_column=add_column["name"], + reg_exp=add_column.get("reg_exp", None), + prefix=add_column.get("prefix", ""), + suffix=add_column.get("suffix", ""), + length=add_column.get("length", None), + group_chars=add_column.get("group_chars", None), + group_separator=add_column.get("group_separator", "."), + ) - item_type = item.get("type") - item_url = item.get("url") + # Drop columns if specified in data_source: + if columns_to_drop: + data.drop_columns(columns_to_drop) - # check that we have the required information - # for the given item type: - match item_type: - case 140: # URL - if item_url == "": - logger.error( - "Item -> %s has type URL but the URL is not in the payload. Skipping...", - item_name, - ) - success = False - continue - case 1: # Shortcut - if original_id == 0: - logger.error( - "Item -> %s has type Shortcut but the original item is not in the payload. Skipping...", - item_name, - ) - success = False - continue + # Keep only selected columns if specified in data_source: + if columns_to_keep: + data.keep_columns(columns_to_keep) - # Check if an item with the same name does already exist. - # This can also be the case if the python container runs a 2nd time. - # For this reason we are also not issuing an error but just an info (False): - response = self._otcs.get_node_by_parent_and_name( - int(parent_id), item_name, show_error=False + # cleanup data if specified in data_source + if cleansings: + logger.info("Start cleansing for data source -> '%s'...", data_source_name) + data.cleanse(cleansings=cleansings) + logger.info( + "Cleansing for data source -> '%s' completed...", data_source_name ) - if self._otcs.get_result_value(response, "name") == item_name: - logger.info( - "Item with name -> %s does already exist in parent folder with ID -> %s", - item_name, - parent_id, - ) + + # Check if fields with list substructures should be exploded: + for explosion in explosions: + # explode field can be a string or a list + # exploding multiple fields at once avoids + # combinatorial explosions - this is VERY + # different from exploding columns one after the other! + if not "explode_field" in explosion: + logger.error("Missing explosion field(s)!") continue - response = self._otcs.create_item( - int(parent_id), - str(item_type), - item_name, - item_description, - item_url, - int(original_id), + explode_field = explosion["explode_field"] + flatten_fields = explosion.get("flatten_fields", []) + split_string_to_list = explosion.get("split_string_to_list", False) + logger.info( + "Starting explosion of data source '%s' by field(s) -> '%s' (type -> '%s'). Size of data set before explosion -> %s", + data_source_name, + str(explode_field), + type(explode_field), + str(len(data)), ) - if not response: - logger.error("Failed to create item -> %s.", item_name) - success = False + data.explode_and_flatten( + explode_field=explode_field, + flatten_fields=flatten_fields, + make_unique=False, + split_string_to_list=split_string_to_list, + ) + logger.info("Size of data set after explosion -> %s", str(len(data))) - self.write_status_file(success, section_name, items) + # Keep only selected rows if conditions are specified in data_source + # We have this after "explosions" to allow access to subfields as well: + if conditions: + data.filter(conditions=conditions) - return success + # Keep the Data Frame for later lookups: + data_source["data"] = data - # end method definition + self._log_header_callback( + text="Completed Bulk Data Source -> '{}'".format(data_source_name), char="-" + ) - def process_permissions( - self, permissions: list, section_name: str = "permissions" - ) -> bool: - """Process items specified in payload and upadate permissions. + return data + + # end method definition + + def process_bulk_workspaces(self, section_name: str = "bulkWorkspaces") -> bool: + """Process workspaces in payload and bulk create them in Extended ECM (multi-threaded). Args: - permissions (list): list of items to apply permissions to. - Each list item in the payload is a dict with this structure: - { - nodeid = "..." - volume = "..." - nickname = "..." - public_access_permissions = ["see", "see_content", ...] - owner_permissions = [] - owner_group_permissions = [] - groups = [ - { - name = "..." - permissions = [] - } - ] - users = [ - { - name = "..." - permissions = [] - } - ] - apply_to = 2 - } section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type - are used (e.g. the "Post" sections like "permissionsPost") + are used (e.g. the "Post" sections like "workspacesPost") This name is also used for the "success" status files written to the Admin Personal Workspace - Returns: bool: True if payload has been processed without errors, False otherwise """ - if not permissions: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._bulk_workspaces: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not pandas_installed: + logger.info("Python module 'Pandas' not installed. Skipping...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: - if self.check_status_file(section_name): + if self.check_status_file(payload_section_name=section_name): return True + # For efficient idem-potent operation we may want to see which workspaces + # have already been processed before: + if self.check_status_file( + payload_section_name=section_name, payload_specific=True, prefix="failure_" + ): + # Read payload from where we left it last time + self._bulk_workspaces = self.get_status_file( + payload_section_name=section_name, prefix="failure_" + ) + if not self._bulk_workspaces: + logger.error( + "Cannot load existing bulkWorkspaces failure file. Bailing out!" + ) + return False + success: bool = True - for permission in permissions: - if ( - not "path" in permission - and not "volume" in permission - and not "nickname" in permission - ): + for bulk_workspace in self._bulk_workspaces: + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if "enabled" in bulk_workspace and not bulk_workspace["enabled"]: + logger.info("Payload for Bulk Workspace is disabled. Skipping...") + continue + + # Read Type Name from payload: + if not "type_name" in bulk_workspace: logger.error( - "Item to change permission is not specified (needs path, volume, or nickname). Skipping..." + "Bulk Workspace needs a type name! Skipping to next workspace..." ) success = False continue + type_name = bulk_workspace["type_name"] - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in permission and not permission["enabled"]: - logger.info("Payload for Permission is disabled. Skipping...") + # The payload element must have a "data_source" key: + if not "data_source" in bulk_workspace: + logger.error("No data source specified in Bulk Workspace!") + success = False continue + data_source_name = bulk_workspace["data_source"] - node_id = 0 + self._log_header_callback( + text="Process Bulk Workspaces for -> '{}' using data source -> '{}'".format( + type_name, data_source_name + ), + char="-", + ) - # Check if "volume" is in payload and not empty string - # we try to get the node ID from the volume type: - if "volume" in permission and permission["volume"]: - volume_type = permission["volume"] + force_reload = bulk_workspace.get("force_reload", True) + + # Load and prepare the data source for the bulk processing: + data = self.process_bulk_datasource( + data_source_name=data_source_name, force_reload=force_reload + ) + if not data: + logger.error( + "Failed to load data source for bulk workspace type -> '%s'", + type_name, + ) + continue + + # Check if duplicate lines for given fields should be removed: + if "unique" in bulk_workspace and bulk_workspace["unique"]: + unique_fields = bulk_workspace["unique"] logger.info( - "Found volume type -> %s in permission definition. Determine volume ID...", - volume_type, + "Starting deduplication of data set for workspace type -> '%s' with unique fields -> %s. Size of data set before deduplication -> %s", + type_name, + str(unique_fields), + str(len(data)), ) - node = self._otcs.get_volume(volume_type) - node_id = self._otcs.get_result_value(node, "id") - if not node_id: + data.deduplicate(unique_fields=unique_fields, inplace=True) + logger.info( + "Size of data set after deduplication -> %s", str(len(data)) + ) + + # Sort the data set if "sort" specified in payload. We may want to do this to have a + # higher chance that rows with workspace names that may collapse into + # one name are put into the same partition. This can avoid race conditions + # between different Python threads. + if "sort" in bulk_workspace and bulk_workspace["sort"]: + sort_fields = bulk_workspace["sort"] + logger.info( + "Start sorting of data set for workspace type -> '%s' based on fields (columns) -> %s...", + type_name, + str(sort_fields), + ) + data.sort(sort_fields=sort_fields, inplace=True) + logger.info( + "Sorting of data set for workspace type -> '%s' based on fields (columns) -> '%s' completed!", + type_name, + str(sort_fields), + ) + + # Read name field from payload: + if not "name" in bulk_workspace or not bulk_workspace["name"]: + logger.error( + "Bulk Workspace needs a name field! Skipping to next workspace..." + ) + success = False + continue + workspace_name_field = bulk_workspace["name"] + + # Read optional description field from payload: + if not "description" in bulk_workspace: + workspace_description_field = None + else: + workspace_description_field = bulk_workspace["description"] + + # Find the workspace type with the name given in the payload: + workspace_type = next( + (item for item in self._workspace_types if item["name"] == type_name), + None, + ) + if workspace_type is None: + logger.error( + "Workspace Type -> '%s' not found. Skipping to next bulk workspace...", + type_name, + ) + success = False + continue + if workspace_type["templates"] == []: + logger.error( + "Workspace Type -> '%s' does not have templates. Skipping to next bulk workspace...", + type_name, + ) + success = False + continue + + # check if the template to be used is specified in the payload: + if "template_name" in bulk_workspace: + template_name = bulk_workspace["template_name"] + workspace_template = next( + ( + item + for item in workspace_type["templates"] + if item["name"] == template_name + ), + None, + ) + if workspace_template: # does this template exist? + logger.info( + "Workspace Template -> '%s' has been specified in payload and it does exist.", + template_name, + ) + else: logger.error( - "Illegal volume -> %s in permission specification. Skipping...", - volume_type, + "Workspace Template -> '%s' has been specified in payload but it doesn't exist!", + template_name, + ) + logger.error( + "Workspace Type -> '%s' has only these templates -> %s", + type_name, + workspace_type["templates"], ) success = False continue + # template to be used is NOT specified in the payload - then we just take the first one: else: - # the following path block requires - # a value for the volume - if it is - # not specified we take the Enterprise Workspace (141): - volume_type = 141 - - # Check if "path" is in payload and not empty list - # (path can be combined with volume so we need to take volume into account): - if "path" in permission and permission["path"]: - path = permission["path"] + workspace_template = workspace_type["templates"][0] logger.info( - "Found path -> %s in permission definition. Determine node ID...", - path, + "Workspace Template has not been specified in payload - we just take the first one (%s)", + workspace_template, ) - node = self._otcs.get_node_by_volume_and_path(volume_type, path) - node_id = self._otcs.get_result_value(node, "id") - if not node_id: - logger.error("Path -> %s does not exist. Skipping...", path) - success = False - continue - # Check if "nickname" is in payload and not empty string: - if "nickname" in permission and permission["nickname"]: - nickname = permission["nickname"] + template_id = workspace_template["id"] + template_name = workspace_template["name"] + workspace_type_id = workspace_type["id"] + + if not "categories" in bulk_workspace: logger.info( - "Found nickname -> %s in permission definition. Determine node ID...", - nickname, + "Bulk workspace payload has no category data! Will leave category attributes empty..." ) - node = self._otcs.get_node_from_nickname(nickname) - node_id = self._otcs.get_result_value(node, "id") - if not node_id: - logger.error("Nickname -> {} does not exist. Skipping...") + categories = None + else: + categories = bulk_workspace["categories"] + + # Should existing workspaces be updated? No is the default. + enforce_updates = bulk_workspace.get("enforce_updates", False) + + logger.info( + "Bulk create Workspaces (name field -> %s, type -> '%s') from workspace template -> '%s' (%s). Enforce Updates -> %s.", + workspace_name_field, + type_name, + template_name, + template_id, + str(enforce_updates), + ) + + # see if bulkWorkspace definition has a specific thread number + # otherwise it is read from a global environment variable + bulk_thread_number = int( + bulk_workspace.get("thread_number", BULK_THREAD_NUMBER) + ) + + partitions = data.partitionate(bulk_thread_number) + + # Create a list to hold the threads + threads = [] + results = [] + + # Create and start a thread for each partition + for index, partition in enumerate(partitions, start=1): + thread = threading.Thread( + name=f"{section_name}_{index:02}", + target=self.thread_wrapper, + args=( + self.process_bulk_workspaces_worker, + bulk_workspace, + partition, + template_id, + workspace_type_id, + workspace_name_field, + workspace_description_field, + categories, + enforce_updates, + results, + ), + ) + # start a thread executing the process_bulk_workspaces_worker() mthod below: + logger.info("Starting Thread -> %s...", str(thread.name)) + thread.start() + threads.append(thread) + + # Wait for all threads to complete + for thread in threads: + logger.info("Waiting for Thread -> %s to complete...", str(thread.name)) + thread.join() + logger.info("Thread -> %s has completed.", str(thread.name)) + + if not "workspaces" in bulk_workspace: + bulk_workspace["workspaces"] = {} + for result in results: + if not result["success"]: + logger.info( + "Thread -> %s completed with %s failed, %s skipped, and %s created %s workspaces.", + str(result["thread_id"]), + str(result["failure_counter"]), + str(result["skipped_counter"]), + str(result["success_counter"]), + type_name, + ) success = False - continue + else: + logger.info( + "Thread -> %s completed successful with %s skipped, and %s created %s workspaces.", + str(result["thread_id"]), + str(result["skipped_counter"]), + str(result["success_counter"]), + type_name, + ) + # Record all generated workspaces. If this should allow us + # to restart in case of failures and avoid trying to + # create workspaces that have been created before + bulk_workspace["workspaces"].update(result["workspaces"]) - # Now we should have a value for node_id: - if not node_id: - logger.error("No node ID found! Skipping permission...") - success = False + self.write_status_file(success, section_name, self._bulk_workspaces) + + return success + + # end method definition + + def process_bulk_categories( + self, row: pd.Series, index: str, categories: list, replacements: list + ) -> list: + """Helper method to replace the value placeholders the bulk category structures with the Pandas Series (row) + + Args: + row (pd.Series): current row + index (str): the index of the Pandas Data Frame. Just used here for logging. + categories (list): list of payload categories + replacements (list): list of replacements + """ + + # Make sure the threads are not changing data structures that + # are shared between threads. categories is a list of dicts. + # list and dicts are "mutable" data structures in Python! + worker_categories = copy.deepcopy(categories) + + # this loop generates "value" for each + # "value_field". "value_field" may also contain lists + # that are either delimited by [...] or by a "value_type" with value "list" + for category_item in worker_categories: + if not "attribute" in category_item: + logger.error( + "Category item -> %s is missing the attribute field!", + category_item, + ) + continue + + # per default the value is in the "value" item: + value = category_item.get("value", None) + + # is there a value replacement for the current attribute? + if "value_field" in category_item: + value_field = category_item["value_field"] + if "row" in category_item: + set_index = int(category_item["row"]) - 1 + else: + # We set this to None and not 0 + # to handle cases where we have multi-value + # attributes that take a list as parameter + set_index = None + + # this method always returns a string even if the value is + # actually a list. + value = self.replace_bulk_placeholders( + input_string=value_field, + row=row, + index=set_index, + replacements=replacements, + ) + else: + value_field = None + + # if we don't have a value now, then there's an issue: + if value is None: + logger.error( + "Category item needs either a value or value_field! Skipping attribute -> '%s'", + category_item["attribute"], + ) continue - node_name = self._otcs.get_result_value(node, "name") + # Handle this special case where we get a string that actually represents a list. + # Convert it back to a real list: + is_list = False + if category_item.get("value_type", "string") == "list": + # if it is explicitly declared + logger.debug( + "Value -> %s is declared in payload to be a list (items separated by comma or semicolon)", + value, + ) + is_list = True + # also values not declared as lists may include lists indicated by [...] + # also if value_type == "list" we double-check that no [...] are around the values: + if value.startswith("[") and value.endswith("]"): + # Remove the square brackets and declare it is a list! + logger.debug( + "Value string -> %s has [...] - remove brackets...", + value, + ) + value = value.strip("[]") + is_list = True + if is_list: + # we split the string at commas or semicolons: + list_splitter = category_item.get("list_splitter", ";,") + logger.debug( + "Split value string -> %s after these characters -> '%s'", + value, + list_splitter, + ) + + # Escape the split characters to ensure they are treated literally in the regex pattern + escaped_splitter = re.escape(list_splitter) + + # Construct the regex pattern dynamically + pattern = rf"[{escaped_splitter}]\s*" + + # Split the value string at the defined splitter characters: + elements = re.split(pattern, value) + + # Remove the quotes around each element + elements = [element.strip("'") for element in elements] + value = elements + logger.debug( + "Found list for a multi-value category attribute -> '%s' from field -> '%s' in data row -> %s. Value -> %s", + category_item["attribute"], + value_field, + index, + str(value), + ) + # now we check if there's a data lookup configured in the payload: + lookup_data_source = category_item.get("lookup_data_source", None) + if lookup_data_source: + logger.info( + "Found lookup data source -> '%s' for attribute -> '%s'. Processing...", + lookup_data_source, + category_item["attribute"], + ) + if not isinstance(value, list): + # value is a single value and not a list: + (_, synonym) = self.process_bulk_workspaces_synonym_lookup( + data_source_name=lookup_data_source, + workspace_name_synonym=value, + ) + if synonym: + logger.info( + "Found synonym -> '%s' for attribute -> '%s' and value -> '%s' in data source -> '%s'", + synonym, + category_item["attribute"], + value, + lookup_data_source, + ) + value = synonym + else: + logger.warning( + "Cannot lookup the value for attribute -> '%s' and value -> '%s' in data source -> '%s'. Keep existing value.", + category_item["attribute"], + value, + lookup_data_source, + ) + else: + # value is a list - so we apply the lookup to each item: + for i, s in enumerate(value): + (_, synonym) = self.process_bulk_workspaces_synonym_lookup( + data_source_name=lookup_data_source, + workspace_name_synonym=s, + workspace_type=None, # we don't need the workspace ID, just the workspace name + ) + if synonym: + logger.info( + "Found synonym -> '%s' for attribute -> '%s' and value -> '%s' in data source -> '%s'", + synonym, + category_item["attribute"], + value[i], + lookup_data_source, + ) + value[i] = synonym + else: + logger.warning( + "Cannot lookup the value for attribute -> '%s' and value -> '%s' in data source -> '%s'. Keep existing value.", + category_item["attribute"], + value[i], + lookup_data_source, + ) + if value_field: + logger.debug( + "Reading category attribute -> '%s' from field -> '%s' in data row -> %s. Value -> %s", + category_item["attribute"], + value_field, + index, + str(value), + ) + else: + logger.debug( + "Setting category attribute -> '%s' to value -> %s", + category_item["attribute"], + str(value), + ) + category_item["value"] = value + # end for category_item... + + # cleanup categories_payload to remove empty rows of sets: + rows_to_remove = {} + for attribute in worker_categories: + if attribute.get("row") is not None: + set_name = attribute["set"] + row_number = attribute["row"] + value = attribute["value"] + + # If value is empty, track it for removal + if not value: # Treat empty strings or None as empty + if (set_name, row_number) not in rows_to_remove: + rows_to_remove[(set_name, row_number)] = True + else: + # If any value in the row is not empty, mark the row as not removable + rows_to_remove[(set_name, row_number)] = False + logger.debug("Empty Rows to remove from sets: %s", rows_to_remove) + cleaned_categories = [ + item + for item in worker_categories + if "set" not in item + or "row" not in item + or not rows_to_remove.get((item["set"], item["row"]), False) + ] + + return cleaned_categories + + # end method definition + + def process_bulk_workspaces_worker( + self, + bulk_workspace: dict, + partition: pd.DataFrame, + template_id: int, + workspace_type_id: int, + workspace_name_field: str, + workspace_description_field: str, + categories: list | None = None, + enforce_updates: bool = False, + results: list | None = None, + ): + """This is the thread worker to create workspaces in bulk. + Each worker thread gets a partition of the rows that include + the data required for the workspace creation. + + Args: + partition (pd.DataFrame): Data partition with rows to process + template_id (int): ID of the workspace template to use + workspace_type_id (int): ID of the workspace type + workspace_name_field (str): Field where the workspace name is stored + workspace_description_field (str): Field where the workspace description is stored + categories (list): list of category dictionieres + enforce_updates (bool): should existing workspaces be updated with new metadata? + results (list): mutable list of thread results + """ + + thread_id = threading.get_ident() + + logger.info( + "Start working on data set partition of size -> %s to bulk create workspaces...", + str(len(partition)), + ) + + result = {} + result["thread_id"] = thread_id + result["success_counter"] = 0 + result["failure_counter"] = 0 + result["skipped_counter"] = 0 + result["workspaces"] = {} + result["success"] = True + + # Check if workspaces have been processed before, e.i. testing + # if a "workspaces" key exists and if it is pointing to a non-empty list. + # Additionally we check that workspace updates are not enforced: + if bulk_workspace.get("workspaces", None) and not enforce_updates: + existing_workspaces = bulk_workspace["workspaces"] logger.info( - "Found node -> %s with ID -> %s to apply permission to.", - node_name, - node_id, + "Found %s already processed workspaces. Try to complete the job...", + str(len(existing_workspaces)), ) - # write node information back into payload - # for better debugging - permission["node_name"] = node_name - permission["node_id"] = node_id + else: + existing_workspaces = {} - if "apply_to" in permission: - apply_to = permission["apply_to"] - else: - apply_to = 2 # make item + sub-items the default + # See if external creation and modification dates are in the data: + external_modify_date_field = bulk_workspace.get("external_modify_date", None) + external_create_date_field = bulk_workspace.get("external_create_date", None) - # 1. Process Owner Permissions (list canbe empty!) - if "owner_permissions" in permission: - owner_permissions = permission["owner_permissions"] + # See if we have a key field to uniquely identify an existing workspace: + key_field = bulk_workspace.get("key", None) + + # Get dictionary of replacements for bulk workspace creations + # this we will be used of all places data is read from the + # data frame. Each dictionary item has the field name as the + # dictionary key and a list of regular expressions as dictionary value + replacements = bulk_workspace.get("replacements", None) + + # Fetch the nickname field from the payload (if it is specified): + nickname_field = bulk_workspace.get("nickname", None) + + # Nicknames are very limited in terms of allowed characters. + # For nicknames we need additional regexp as we need to + # replace all non-alphanumeric, non-space characters with "" + # We also preserve hyphens in the first step to replace + # them below with underscores. This is important to avoid + # that different spellings of names produce different nicknames. + # We want spellings with spaces match spellings with hyphens. + # For this the workspace names have a regexp "-| " in the payload. + nickname_additional_regex_list = [r"[^\w\s-]"] + + # Process all datasets in the partion that was given to the thread: + for index, row in partition.iterrows(): + + # clear variables to esure clean state + workspace_id = None + + logger.info( + "Processing data row -> %s for bulk workspace creation...", + str(index), + ) + + # Determine the workspace name: + workspace_name = self.replace_bulk_placeholders( + input_string=workspace_name_field, + row=row, + replacements=replacements, + ) + if not workspace_name: + logger.warning( + "Row -> %s does not have the required data to resolve -> %s for the workspace name!", + str(index), + workspace_name_field, + ) + result["skipped_counter"] += 1 + continue + # Workspace names for sure are not allowed to have ":": + workspace_name = workspace_name.replace(":", "") + # Truncate the workspace name to 254 characters which is the maximum allowed length in Extended ECM + if len(workspace_name) > 254: + workspace_name = workspace_name[:254] + + # Check if workspace has been created before (either in this run + # or in a former run of the customizer): + if ( + workspace_name in existing_workspaces # processed in former run? + or workspace_name in result["workspaces"] # processed in current run? + ): logger.info( - "Update owner permissions for item -> %s (%s) to -> %s", - node_name, - str(node_id), - str(owner_permissions), + "Workspace -> '%s' does already exist. Skipping...", + workspace_name, ) - response = self._otcs.assign_permission( - int(node_id), "owner", 0, owner_permissions, apply_to + result["skipped_counter"] += 1 + continue + + # Determine the description field: + if workspace_description_field: + description = self.replace_bulk_placeholders( + input_string=workspace_description_field, row=row ) - if not response: - logger.error( - "Failed to update owner permissions for item -> %s (%s).", - node_name, - str(node_id), + # Truncate the workspace description to 254 characters which is the maximum allowed length in Extended ECM + # if len(description) > 254: + # description = description[:254] + else: + description = "" + + # Check if all data conditions to create the workspace are met + conditions = bulk_workspace.get("conditions", None) + if conditions: + evaluated_condition = self.evaluate_conditions( + conditions=conditions, row=row, replacements=replacements + ) + if not evaluated_condition: + logger.info( + "Condition for row -> %s not met. Skipping row for workspace creation", + str(index), ) - success = False + result["skipped_counter"] += 1 + continue - # 2. Process Owner Group Permissions - if "owner_group_permissions" in permission: - owner_group_permissions = permission["owner_group_permissions"] - logger.info( - "Update owner group permissions for item -> %s (%s) to -> %s", - node_name, - str(node_id), - str(owner_group_permissions), + # Determine the external modification field (if any): + if external_modify_date_field: + external_modify_date = self.replace_bulk_placeholders( + input_string=external_modify_date_field, row=row ) - response = self._otcs.assign_permission( - int(node_id), "group", 0, owner_group_permissions, apply_to + else: + external_modify_date = None + + # Determine the external creation field (if any): + if external_create_date_field: + external_create_date = self.replace_bulk_placeholders( + input_string=external_create_date_field, row=row ) - if not response: - logger.error( - "Failed to update group permissions for item -> %s (%s).", - node_name, - str(node_id), + else: + external_create_date = None + + # Determine the key field (if any): + if key_field: + key = self.replace_bulk_placeholders(input_string=key_field, row=row) + else: + key = None + + # check if workspace with this nickname does already exist. + # we also store the nickname to assign it to the new workspace: + if nickname_field: + nickname = self.replace_bulk_placeholders( + input_string=nickname_field, + row=row, + replacements=replacements, + additional_regex_list=nickname_additional_regex_list, + ) + # Nicknames for sure are not allowed to include spaces: + nickname = nickname.replace(" ", "_") + # We also want to replace hyphens with underscores + # to make sure that workspace name spellings with + # spaces and with hyphens are mapped to the same + # workspace nicknames (aligned with the workspace names + # that have a regexp rule for this in the payload): + nickname = nickname.replace("-", "_") + nickname = nickname.lower() + response = self._otcs_frontend.get_node_from_nickname(nickname=nickname) + if response: + found_workspace_name = self._otcs_frontend.get_result_value( + response, "name" ) - success = False + if found_workspace_name != workspace_name: + logger.warning( + "Clash of nicknames for -> '%s' and -> '%s'!", + workspace_name, + found_workspace_name, + ) + else: + # Only skip if workspace update is not enforced: + if not enforce_updates: + logger.info( + "Workspace -> '%s' with nickname -> '%s' does already exist (found -> %s). Skipping...", + workspace_name, + nickname, + found_workspace_name, + ) + result["skipped_counter"] += 1 + continue + else: + nickname = None - # 3. Process Public Permissions - if "public_permissions" in permission: - public_permissions = permission["public_permissions"] - logger.info( - "Update public permissions for item -> %s (%s) to -> %s", - node_name, - str(node_id), - str(public_permissions), - ) - response = self._otcs.assign_permission( - int(node_id), "public", 0, public_permissions, apply_to + # Check if workspace does already exist: + response = self._otcs_frontend.get_workspace_by_type_and_name( + type_id=workspace_type_id, name=workspace_name + ) + workspace_id = self._otcs_frontend.get_result_value(response, "id") + + # Check if the workspace does exists with an old name (so it couldn't be found by the name lookup) + # For this we expect a key value to be defined for the + # bulk workspace and one of the category / attribute item + # to be marked with 'is_key' = True. + if not workspace_id and key: + key_attribute = next( + ( + cat_attr + for cat_attr in categories + if cat_attr.get("is_key", False) is True + ), + None, ) - if not response: + if key_attribute: + cat_name = key_attribute.get("name", None) + att_name = key_attribute.get("attribute", None) + # determine where workspaces of this type typically reside: + parent_id = self._otcs_frontend.get_workspace_type_location( + type_id=workspace_type_id + ) + + logger.info( + "Try to find existing workspace with the key value -> '%s' in category -> '%s' and attribute -> '%s' in folder with ID -> %s...", + key, + cat_name, + att_name, + parent_id, + ) + # Try to find the node that has the given attribute value: + response = self._otcs_frontend.lookup_node( + parent_node_id=parent_id, + category=cat_name, + attribute=att_name, + value=key, + ) + workspace_id = self._otcs_frontend.get_result_value(response, "id") + else: logger.error( - "Failed to update public permissions for item -> %s (%s).", - str(node_id), - node_name, + "Bulk Workspace has a key -> '%s' defined but none of the category attributes is marked as a key attribute ('is_key' is missing)!", + key, ) - success = False - continue - # 4. Process Assigned User Permissions (if specified and not empty) - if "users" in permission and permission["users"]: - users = permission["users"] - for user in users: - if not "name" in user or not "permissions" in user: - logger.error( - "Missing user name or permissions in user permission specificiation. Cannot set user permissions. Skipping..." - ) - success = False - continue - user_name = user["name"] - user_permissions = user["permissions"] - response = self._otcs.get_user(user_name, True) - user_id = self._otcs.get_result_value(response=response, key="id") - if not user_id: - logger.error( - "Cannot find user with name -> %s; cannot set user permissions. Skipping user...", - user_name, - ) - success = False - continue - user["id"] = user_id # write ID back into payload + # We try to get the external modify date of the existing workspace. + # The REST API may not return these field if it was never set before. + # So we set show_error = False for this call to avoid error messages. + workspace_external_modify_date = self._otcs_frontend.get_result_value( + response, "external_modify_date", show_error=False + ) - logger.info( - "Update permission of user -> %s for item -> %s (%s) to -> %s", - user_name, - node_name, - str(node_id), - str(user_permissions), + # Workspace does not exists - we create a new workspace: + if not workspace_id: + # If category data is in payload we substitute + # the values with data from the current data row: + if categories: + # Make sure the threads are not changing data structures that + # are shared between threads. categories is a list of dicts. + # list and dicts are "mutable" data structures in Python! + worker_categories = self.process_bulk_categories( + row=row, + index=index, + categories=categories, + replacements=replacements, ) - response = self._otcs.assign_permission( - int(node_id), "custom", user_id, user_permissions, apply_to + workspace_category_data = self.prepare_workspace_create_form( + categories=worker_categories, + template_id=template_id, ) - if not response: + if not workspace_category_data: logger.error( - "Failed to update assigned user permissions for item -> %s.", - node_id, + "Failed to prepare the category data for new workspace -> '%s'!", + workspace_name, ) - success = False + result["success"] = False + result["failure_counter"] += 1 + continue # for index, row in partition.iterrows() + else: + workspace_category_data = {} - # 5. Process Assigned Group Permissions (if specified and not empty) - if "groups" in permission and permission["groups"]: - groups = permission["groups"] - for group in groups: - if not "name" in group or not "permissions" in group: - logger.error( - "Missing group name or permissions in group permission specificiation. Cannot set group permissions. Skipping..." - ) - continue - group_name = group["name"] - group_permissions = group["permissions"] + logger.info( + "Bulk create workspace -> '%s'...", + workspace_name, + ) + + # Create the workspace with all provided information: + response = self._otcs_frontend.create_workspace( + workspace_template_id=template_id, + workspace_name=workspace_name, + workspace_description=description, + workspace_type=workspace_type_id, + category_data=workspace_category_data, + external_create_date=external_create_date, + external_modify_date=external_modify_date, + show_error=False, + ) + if response is None: + # Potential race condition: see if the workspace has been created by a concurrent thread. + # So we better check if the workspace is there even if the create_workspace() call delivered + # a 'None' response: + response = self._otcs_frontend.get_workspace_by_type_and_name( + type_id=workspace_type_id, name=workspace_name + ) + workspace_id = self._otcs_frontend.get_result_value(response, "id") + if not workspace_id: + logger.error( + "Failed to bulk create workspace -> '%s' with type ID -> %s!", + workspace_name, + workspace_type_id, + ) + result["success"] = False + result["failure_counter"] += 1 + continue + else: logger.info( - "Update permissions of group -> %s for item -> %s (%s) to -> %s", - group_name, - node_name, - str(node_id), - str(group_permissions), + "Successfully created bulk workspace -> '%s' with ID -> %s", + workspace_name, + workspace_id, ) - otcs_group = self._otcs.get_group(group_name, True) - group_id = self._otcs.get_result_value(otcs_group, "id") - if not group_id: - logger.error( - "Cannot find group with name -> %s; cannot set group permissions. Skipping group...", - group_name, - ) - success = False - continue - group["id"] = group_id # write ID back into payload - response = self._otcs.assign_permission( - int(node_id), "custom", group_id, group_permissions, apply_to + if self._aviator_enabled: + if ( + "enable_aviator" in bulk_workspace + and bulk_workspace["enable_aviator"] is True + ): + response = self._otcs_frontend.update_workspace_aviator( + workspace_id, True + ) + if not response: + logger.error( + "Failed to enable Content Aviator for workspace -> '%s' (%s)", + workspace_name, + workspace_id, + ) + # end if not workspace_id + + # If updates are enforced we update the existing workspace with + # fresh metadata from the payload. Additionally we check the external + # modify date to support incremental load for content that has really + # changed. + elif enforce_updates and OTCS.date_is_newer( + date_old=workspace_external_modify_date, date_new=external_modify_date + ): + # If category data is in payload we substitute + # the values with data from the current data row: + if categories: + # Make sure the threads are not changing data structures that + # are shared between threads. categories is a list of dicts. + # list and dicts are "mutable" data structures in Python! + worker_categories = self.process_bulk_categories( + row=row, + index=index, + categories=categories, + replacements=replacements, ) - if not response: + # Transform the payload structure into the format + # the OTCS REST API requires: + workspace_category_data = self.prepare_category_data( + categories_payload=worker_categories, + source_node_id=workspace_id, + ) + if not workspace_category_data: logger.error( - "Failed to update assigned group permissions for item -> %s (%s).", - node_name, - str(node_id), + "Failed to prepare the updated category data for workspace -> '%s'!", + workspace_name, ) - success = False - - self.write_status_file(success, section_name, permissions) - - return success - - # end method definition - - def process_assignments(self, section_name: str = "assignments") -> bool: - """Process assignments specified in payload and assign items (such as workspaces and - items with nicknames) to users or groups. - - Args: - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace - Returns: - bool: True if payload has been processed without errors, False otherwise - """ - - if not self._assignments: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True - - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True - - success: bool = True - - for assignment in self._assignments: - # Sanity check: we need a subject - it's mandatory: - if not "subject" in assignment: - logger.error("Assignment needs a subject! Skipping assignment...") - success = False - continue - subject = assignment["subject"] + result["success"] = False + result["failure_counter"] += 1 + continue # for index, row in partition.iterrows() + else: + workspace_category_data = {} - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in assignment and not assignment["enabled"]: logger.info( - "Payload for Assignment -> %s is disabled. Skipping...", subject - ) - continue - - # instruction is optional but we give a warning if they are missing: - if not "instruction" in assignment: - logger.warning("Assignment -> %s should have an instruction!", subject) - instruction = "" - else: - instruction = assignment["instruction"] - # Sanity check: we either need users or groups (or both): - if not "groups" in assignment and not "users" in assignment: - logger.error( - "Assignment -> %s needs groups or users! Skipping assignment...", - subject, + "Bulk update existing workspace -> '%s'...", + workspace_name, ) - success = False - continue - # Check if a workspace is specified for the assignment and check it does exist: - if "workspace" in assignment and assignment["workspace"]: - workspace = next( - ( - item - for item in self._workspaces - if item["id"] == assignment["workspace"] - ), - None, + # Update the workspace with all provided information: + response = self._otcs_frontend.update_workspace( + workspace_id=workspace_id, + workspace_name=workspace_name, + workspace_description=description, + category_data=workspace_category_data, + external_create_date=external_create_date, + external_modify_date=external_modify_date, + show_error=True, ) - if not workspace: - logger.error( - "Assignment -> %s has specified a not existing workspace -> %s! Skipping assignment...", - subject, - assignment["workspace"], - ) - success = False - continue - node_id = self.determine_workspace_id(workspace) - if not node_id: + if not response: logger.error( - "Assignment -> %s has specified a not existing workspace -> %s! Skipping assignment...", - subject, - assignment["workspace"], + "Failed to bulk update existing workspace -> '%s' with type ID -> %s!", + workspace_name, + workspace_type_id, ) - success = False + result["success"] = False + result["failure_counter"] += 1 continue - # If we don't have a workspace then check if a nickname is specified for the assignment: - elif "nickname" in assignment: - response = self._otcs.get_node_from_nickname(assignment["nickname"]) - node_id = self._otcs.get_result_value(response, "id") - if not node_id: - # if response == None: + + # nickname has been calculated for existence test above + # we now assign it to the new workspace + if nickname: + response = self._otcs_frontend.set_node_nickname( + node_id=workspace_id, nickname=nickname, show_error=True + ) + if not response: logger.error( - "Assignment item with nickname -> %s not found", - assignment["nickname"], + "Failed to assign nickname -> '%s' to workspace -> '%s'", + nickname, + workspace_name, ) - success = False - continue - else: - logger.error( - "Assignment -> %s needs a workspace or nickname! Skipping assignment...", - subject, - ) - success = False - continue + result["success_counter"] += 1 + # Record the workspace name and ID to allow to read it from failure file + # and speedup the process. + result["workspaces"][workspace_name] = workspace_id - assignees = [] + logger.info("End working...") - if "groups" in assignment: - group_assignees = assignment["groups"] - for group_assignee in group_assignees: - # find the group in the group list - group = next( - ( - item - for item in self._groups - if item["name"] == group_assignee - ), - None, - ) - if not group: - logger.error( - "Assignment group -> %s does not exist! Skipping group...", - group_assignee, - ) - success = False - continue - if not "id" in group: - logger.error( - "Assignment group -> %s does not have an ID. Skipping group...", - group_assignee, - ) - success = False - continue - group_id = group["id"] - # add the group ID to the assignee list: - assignees.append(group_id) + results.append(result) - if "users" in assignment: - user_assignees = assignment["users"] - for user_assignee in user_assignees: - # find the user in the user list - user = next( - (item for item in self._users if item["name"] == user_assignee), - None, - ) - if not user: - logger.error( - "Assignment user -> %s does not exist! Skipping user...", - user_assignee, - ) - success = False - continue - if not "id" in user: - logger.error( - "Assignment user -> %s does not have an ID. Skipping user...", - user_assignee, - ) - success = False - continue - user_id = user["id"] - # add the group ID to the assignee list: - assignees.append(user_id) + return - if not assignees: - logger.error( - "Cannot add assignment -> %s for node ID -> %s because no assignee was found.", - subject, - node_id, - ) - success = False - continue + # end method definition - response = self._otcs.assign_item_to_user_group( - int(node_id), subject, instruction, assignees + def lookup_data_source_value( + self, data_source: dict, lookup_column: str, lookup_value: str + ) -> pd.Series | None: + """Lookup a value in a given data source (specified by payload dict). + If the data source has not been loaded before then load the data source. + + Args: + data_source (dict): Payload dictionary of the data source definition. + lookup_column (str): Name of the column in the data frame (see Data class) + lookup_value (str): Value to lookup - selection criteria for the result row. + + Returns: + pd.Series | None: Row that matches the lookup value in the lookup column. + """ + + data_source_name = data_source.get("name", None) + if not data_source_name: + logger.error("Data source has no name!") + return None + + # First we check if the data source has been loaded already. + # If not, we load the data source on the fly: + data_source_data: Data = data_source.get("data", None) + if not data_source_data: + logger.warning( + "Data source -> '%s' has no data. Trying to reload...", + data_source_name, ) - if not response: - logger.error( - "Failed to add assignment -> %s for node ID -> %s with assignees -> %s.", - subject, - node_id, - assignees, + # We don't want multiple threads to trigger a datasource load at the same time, + # so we use a lock (mutex) to avoid this: + data_load_lock.acquire() + try: + data_source_data = self.process_bulk_datasource( + data_source_name=data_source_name, + force_reload=True, ) - success = False + finally: + # Ensure the lock is released even if an error occurs + data_load_lock.release() - self.write_status_file(success, section_name, self._assignments) + # iIf we still don't have data from this data source we bail out: + if not data_source_data: + logger.error( + "Data source -> '%s' has no data and reload did not work. Cannot lookup value -> '%s' in column -> '%s'!", + data_source_name, + lookup_value, + lookup_column, + ) + return None - return success + # Lookup the data frame row (pd.Series) in the given + # column with the given lookup value: + lookup_row: pd.Series = data_source_data.lookup_value( + lookup_column=lookup_column, + lookup_value=lookup_value, + ) - # end method definition + return lookup_row - def process_user_licenses( + # end method definition + + def process_bulk_workspaces_synonym_lookup( self, - resource_name: str, - license_feature: str, - license_name: str, - user_specific_payload_field: str = "licenses", - section_name: str = "userLicenses", - ) -> bool: - """Assign a specific OTDS license feature to all Extended ECM users. - This method is used for OTIV and Extended ECM licenses. + data_source_name: str, + workspace_name_synonym: str = "", + workspace_type: str = "", + ) -> tuple[int | None, str | None] | None: + """Use a datasource to lookup the workspace name (or all fields) and ID using a given synonym. Args: - resource_name (str): name of the OTDS resource - license_feature (str): license feature to assign to the user (product specific) - license_name (str): Name of the license Key (e.g. "EXTENDED_ECM" or "INTELLIGENT_VIEWING") - user_specific_payload_field (str, optional): name of the user specific field in payload - (if empty it will be ignored) - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace + datasource_name (str): Data source name. + workspace_name_synonym (str): The synonym of the workspace name as input for lookup. + workspace_type (str): Name of the workspace type + Returns: - bool: True if payload has been processed without errors, False otherwise + tuple[int | None, int | None]: returns the workspace ID and the looked up workspace name """ - if not self._users: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True - - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + # Determine the data source to use for synonym lookup: + if not data_source_name: + logger.error( + "No workspace data source name specified. Cannot lookup the workspace by synonym -> '%s'!", + workspace_name_synonym, + ) + return (None, None) + workspace_data_source = next( + ( + item + for item in self._bulk_datasources + if item["name"] == data_source_name + ), + None, + ) + if not workspace_data_source: + logger.error( + "Workspace data source -> '%s' not found in payload. Cannot lookup the workspace by synonym -> '%s'!", + data_source_name, + workspace_name_synonym, + ) + return (None, None) - success: bool = True + # Read the synonym column and the name column from the data source payload item: + workspace_data_source_name_column = workspace_data_source.get( + "name_column", None # e.g. "Name" + ) + workspace_data_source_synonyms_column = workspace_data_source.get( + "synonyms_column", None # e.g. "Synonyms" + ) - otds_resource = self._otds.get_resource(resource_name) - if not otds_resource: - logger.error( - "OTDS Resource -> {} not found. Cannot assign licenses to users." + if ( + not workspace_data_source_name_column + or not workspace_data_source_synonyms_column + ): + logger.warning( + "Workspace data source -> '%s' has no synonym lookup columns. Cannot find the workspace by synonym -> '%s'!", + data_source_name, + workspace_name_synonym, ) - return False + return (None, None) - user_partition = self._otcs.config()["partition"] - if not user_partition: - logger.error("OTCS user partition not found in OTDS!") - return False + # Get the row that has the synonym in the synonym column: + lookup_row = self.lookup_data_source_value( + data_source=workspace_data_source, + lookup_column=workspace_data_source_synonyms_column, + lookup_value=workspace_name_synonym, + ) - for user in self._users: - user_name = user["name"] + if lookup_row is not None: + # Now we determine the real workspace name be taking it from + # the name column in the result row: + workspace_name = lookup_row[workspace_data_source_name_column] + logger.info( + "Found workspace name -> '%s' using synonym -> '%s'.", + workspace_name, + workspace_name_synonym, + ) - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in user and not user["enabled"]: - logger.info( - "Payload for User -> %s is disabled. Skipping...", user_name + # We now have the real name. If the workspace type name is + # provided as well we should be able to lookup the workspace ID now: + if workspace_type: + response = self._otcs_frontend.get_workspace_by_type_and_name( + type_name=workspace_type, + name=workspace_name, ) - continue + workspace_id = self._otcs_frontend.get_result_value(response, "id") + else: + # This method may be called with workspace_type=None. + # In this case we can return the synonym but cannot + # lookup the workspace ID: + workspace_id = None - if user_specific_payload_field and user_specific_payload_field in user: - logger.info( - "Found specific license feature -> %s for User -> %s. Overwriting default license feature -> %s", - user[user_specific_payload_field], - user_name, - license_feature, - ) - user_license_feature = user[user_specific_payload_field] - else: # use the default feature from the actual parameter - user_license_feature = [license_feature] + # Return the tuple with workspace_id and the real workspace name + return (workspace_id, workspace_name) - for license_feature in user_license_feature: - if self._otds.is_user_licensed( - user_name=user_name, - resource_id=otds_resource["resourceID"], - license_feature=license_feature, - license_name=license_name, - ): - logger.info( - "User -> %s is already licensed for -> %s (%s)", - user_name, - license_name, - license_feature, - ) - continue - assigned_license = self._otds.assign_user_to_license( - user_partition, - user_name, # we want the plain login name here - otds_resource["resourceID"], - license_feature, - license_name, - ) + return (None, None) - if not assigned_license: - logger.error( - "Failed to assign license feature -> %s to user -> %s!", - license_feature, - user_name, + # end method definition + + def process_bulk_workspaces_lookup( + self, + workspace_nickname: str | None = None, + workspace_name: str | None = None, + workspace_type: str | None = None, + data_source_name: str | None = None, + ) -> tuple[int | None, str | None]: + """Use a combination of workspace name, workspace type, and workspace datasource (using synonyms) + to lookup the workspace name and ID + + Args: + workspace_nickname (str): the nickname of the workspace + workspace_name (str): The name as input for lookup. This must be one of the synonyms. + workspace_type (str): Name of the workspace type + data_source_name (str): Workspace data source name + + Returns: + tuple[int | None, str | None]: returns the workspace ID and the looked up workspace name + """ + + # First we try to find the workspace by a nickname (if provided) + if workspace_nickname: + # Nicknames for sure are not allowed to include spaces and dashes: + workspace_nickname = workspace_nickname.replace(" ", "_") + workspace_nickname = workspace_nickname.replace("-", "_") + workspace_nickname = workspace_nickname.lower() + + response = self._otcs_frontend.get_node_from_nickname( + nickname=workspace_nickname, show_error=False + ) + workspace_id = self._otcs_frontend.get_result_value(response, "id") + if workspace_id: + # If we don't have the name yet get it via OTCS as we have the ID now: + if not workspace_name: + workspace_name = self._otcs_frontend.get_result_value( + response, "name" ) - success = False + return (workspace_id, workspace_name) + # DON'T RETURN FAILURE AT THIS POINT! - self.write_status_file(success, section_name, self._users) + # Our 2nd try is to find the workspace by a workspace name and workspace type combination: + if workspace_name: + workspace_name = workspace_name.strip() + else: + logger.error( + "No workspace name specified. Cannot find the workspace by type and name or synonym.", + ) + return (None, None) + + if workspace_name and workspace_type: + response = self._otcs_frontend.get_workspace_by_type_and_name( + type_name=workspace_type, name=workspace_name + ) + workspace_id = self._otcs_frontend.get_result_value(response, "id") + if workspace_id: + return (workspace_id, workspace_name) + + # if the code gets to here we dont have a nickname and the workspace with given name + # and type was not found either. Now we see if we can find the workspace name + # as a synonym in the workspace data source to find the real/correct name: + if data_source_name: + logger.info( + "Try to find the workspace with the synonym -> '%s' using data source -> '%s'...", + workspace_name, + data_source_name, + ) + + (workspace_id, workspace_synonym_name) = ( + self.process_bulk_workspaces_synonym_lookup( + data_source_name=data_source_name, + workspace_name_synonym=workspace_name, # see if workspace_name is a synonym + workspace_type=workspace_type, + ) + ) + if workspace_id is not None: + return (workspace_id, workspace_synonym_name) + + # As this message may be hunderds of times in the log + # we invest some effort to make it look nice: + message = "Coudn't find a workspace " + concat_string = "" + if workspace_nickname: + message += "by nickname -> '{}'".format(workspace_nickname) + concat_string = ", nor " + if workspace_name: + message += "{}by name -> '{}'".format(concat_string, workspace_name) + concat_string = ", nor " + if data_source_name: + message += "{}as synonym in data source -> {}".format( + concat_string, data_source_name + ) + logger.error(message) - return success + return ( + None, + workspace_name, + ) # it is important to return the name - used by process_bulk_categories() - # end method definition + # end method definition - def process_exec_pod_commands(self, section_name: str = "execPodCommands") -> bool: - """Process commands that should be executed in the Kubernetes pods. + def process_bulk_workspace_relationships( + self, section_name: str = "bulkWorkspaceRelationships" + ) -> bool: + """Process workspaces in payload and bulk create them in Extended ECM (multi-threaded). Args: section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type - are used (e.g. the "Post" sections) + are used (e.g. the "Post" sections like "workspacesPost") This name is also used for the "success" status - files written to the Admin Personal Workspace + files written to the Admin Personal Workspace. Returns: bool: True if payload has been processed without errors, False otherwise """ - if not isinstance(self._k8s, K8s): - logger.error("K8s not setup properly -> Skipping %s...", section_name) - return False - - if not self._exec_pod_commands: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._bulk_workspace_relationships: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not pandas_installed: + logger.info("Python module 'Pandas' not installed. Skipping...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: - if self.check_status_file(section_name): + if self.check_status_file(payload_section_name=section_name): return True + # For efficient idem-potent operation we may want to see which workspaces + # have already been processed before: + if self.check_status_file( + payload_section_name=section_name, payload_specific=True, prefix="failure_" + ): + # Read payload from where we left it last time + self._bulk_workspace_relationships = self.get_status_file( + payload_section_name=section_name, prefix="failure_" + ) + if not self._bulk_workspace_relationships: + logger.error( + "Cannot load existing bulkWorkspaceRelationships failure file. Bailing out!" + ) + return False + success: bool = True - for exec_pod_command in self._exec_pod_commands: - if not "pod_name" in exec_pod_command: + for bulk_workspace_relationship in self._bulk_workspace_relationships: + # Check if element has been disabled in payload (enabled = false). + # In this case we skip the element: + if ( + "enabled" in bulk_workspace_relationship + and not bulk_workspace_relationship["enabled"] + ): + logger.info( + "Payload for Bulk Workspace Relationship is disabled. Skipping..." + ) + continue + + # Read Pattern for "From" Workspace from payload: + if not "from_workspace" in bulk_workspace_relationship: logger.error( - "To execute a command in a pod the pod name needs to be specified in the payload! Skipping to next pod command..." + "Bulk Workspace Relationship creation needs a workspace nickname in from_workspace! Skipping to next bulk workspace relationship..." ) success = False continue - pod_name = exec_pod_command["pod_name"] + from_workspace = bulk_workspace_relationship["from_workspace"] - if not "command" in exec_pod_command or not exec_pod_command.get("command"): + # Read Pattern for "To" Workspace from payload: + if not "to_workspace" in bulk_workspace_relationship: logger.error( - "Pod command is not specified for pod -> %s! It needs to be a non-empty list! Skipping to next pod command...", - pod_name, + "Bulk Workspace Relationship creation needs a workspace nickname in to_workspace! Skipping to next bulk workspace relationship..." ) success = False continue - command = exec_pod_command["command"] + to_workspace = bulk_workspace_relationship["to_workspace"] - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in exec_pod_command and not exec_pod_command["enabled"]: + # The payload element must have a "data_source" key: + if not "data_source" in bulk_workspace_relationship: + logger.error("No data source specified in Bulk Workspace Relationship!") + success = False + continue + data_source_name = bulk_workspace_relationship["data_source"] + + self._log_header_callback( + text="Process Bulk Workspace Relationships from -> '{}' to -> '{}'".format( + from_workspace, to_workspace + ), + char="-", + ) + + copy_data_source = bulk_workspace_relationship.get( + "copy_data_source", False + ) + force_reload = bulk_workspace_relationship.get("force_reload", True) + + if copy_data_source: logger.info( - "Payload for Exec Pod Command in pod -> %s is disabled. Skipping...", - pod_name, + "Take a copy of data source -> %s to avoid sideeffects for repeative usage of the data source...", + bulk_workspace_relationship["data_source"], + ) + data = Data( + self.process_bulk_datasource( + data_source_name=data_source_name, + force_reload=force_reload, + ) + ) + else: + logger.info( + "Use original data source -> %s and not do a copy.", + bulk_workspace_relationship["data_source"], + ) + # Load and prepare the data source for the bulk processing: + data = self.process_bulk_datasource( + data_source_name=data_source_name, force_reload=force_reload + ) + if not data: + logger.error( + "Failed to load data source for bulk workspace relationships from -> '%s' to -> '%s'", + from_workspace, + to_workspace, ) continue - if not "description" in exec_pod_command: - logger.info("Executing command -> %s in pod -> %s", command, pod_name) + # Check if fields with list substructures should be exploded. + # We may want to do this outside the bulkDatasource to only + # explode for bulkDocuments and not for bulkWorkspaces or + # bulkWorkspaceRelationships: + explosions = bulk_workspace_relationship.get("explosions", []) + for explosion in explosions: + # explode field can be a string or a list + # exploding multiple fields at once avoids + # combinatorial explosions - this is VERY + # different from exploding columns one after the other! + if not "explode_field" in explosion: + logger.error("Missing explosion field(s)!") + continue + explode_field = explosion["explode_field"] + flatten_fields = explosion.get("flatten_fields", []) + split_string_to_list = explosion.get("split_string_to_list", False) + logger.info( + "Starting explosion of bulk relationships by field(s) -> %s (type -> %s). Size of data set before explosion -> %s", + explode_field, + type(explode_field), + str(len(data)), + ) + data.explode_and_flatten( + explode_field=explode_field, + flatten_fields=flatten_fields, + make_unique=False, + split_string_to_list=split_string_to_list, + reset_index=True, + ) + logger.info("Size of data set after explosion -> %s", str(len(data))) - else: - description = exec_pod_command["description"] + # Check if duplicate lines for given fields should be removed: + if ( + "unique" in bulk_workspace_relationship + and bulk_workspace_relationship["unique"] + ): + unique_fields = bulk_workspace_relationship["unique"] logger.info( - "Executing command -> %s in pod -> %s (%s)", - command, - pod_name, - description, + "Starting deduplication of data set for bulk workspace relationships with unique fields -> %s. Size of data set before deduplication -> %s", + str(unique_fields), + str(len(data)), + ) + data.deduplicate(unique_fields=unique_fields, inplace=True) + logger.info( + "Size of data set after deduplication -> %s", str(len(data)) ) + # Sort the data set if "sort" specified in payload. We may want to do this to have a + # higher chance that rows with common values that may collapse into + # one name are put into the same partition. This can avoid race conditions + # between different Python threads. if ( - not "interactive" in exec_pod_command - or exec_pod_command["interactive"] is False + "sort" in bulk_workspace_relationship + and bulk_workspace_relationship["sort"] ): - result = self._k8s.exec_pod_command(pod_name, command) - else: - if not "timeout" in exec_pod_command: - result = self._k8s.exec_pod_command_interactive(pod_name, command) + sort_fields = bulk_workspace_relationship["sort"] + logger.info( + "Start sorting of bulk workspace relationships data set based on fields (columns) -> %s...", + str(sort_fields), + ) + data.sort(sort_fields=sort_fields, inplace=True) + logger.info( + "Sorting of bulk workspace relationships data set based on fields (columns) -> %s completed!", + str(sort_fields), + ) + + logger.info( + "Bulk create Workspace Relationships (from workspace -> '%s' to workspace -> '%s')", + from_workspace, + to_workspace, + ) + + bulk_thread_number = int( + bulk_workspace_relationship.get("thread_number", BULK_THREAD_NUMBER) + ) + + partitions = data.partitionate(bulk_thread_number) + + # Create a list to hold the threads + threads = [] + results = [] + + # Create and start a thread for each partition + for index, partition in enumerate(partitions, start=1): + thread = threading.Thread( + name=f"{section_name}_{index:02}", + target=self.thread_wrapper, + args=( + self.process_bulk_workspace_relationships_worker, + bulk_workspace_relationship, + partition, + from_workspace, + to_workspace, + results, + ), + ) + # start a thread executing the process_bulk_workspace_relationship_worker() mthod below: + logger.info("Starting Thread -> %s...", str(thread.name)) + thread.start() + threads.append(thread) + + # Wait for all threads to complete + for thread in threads: + logger.info("Waiting for Thread -> %s to complete...", str(thread.name)) + thread.join() + logger.info("Thread -> %s has completed.", str(thread.name)) + + if not "relationships" in bulk_workspace_relationship: + bulk_workspace_relationship["relationships"] = {} + for result in results: + if not result["success"]: + logger.info( + "Thread -> %s completed with %s failed, %s skipped, and %s created workspace relationships.", + str(result["thread_id"]), + str(result["failure_counter"]), + str(result["skipped_counter"]), + str(result["success_counter"]), + ) + success = False else: - timeout = exec_pod_command["timeout"] - result = self._k8s.exec_pod_command_interactive( - pod_name, command, timeout + logger.info( + "Thread -> %s completed successful with %s skipped, and %s created workspace relationships.", + str(result["thread_id"]), + str(result["skipped_counter"]), + str(result["success_counter"]), ) + # Record all generated workspaces. If this should allow us + # to restart in case of failures and avoid trying to + # create workspaces that have been created before + bulk_workspace_relationship["relationships"].update( + result["relationships"] + ) - # we need to differentiate 3 cases here: - # 1. result = None is returned - this is an error (exception) - # 2. result is empty string - this is OK - # 3. result is a non-empty string - this is OK - print it to log - if result is None: - logger.error( - "Execution of command -> %s in pod -> %s failed", - command, - pod_name, + self.write_status_file( + success, section_name, self._bulk_workspace_relationships + ) + + return success + + # end method definition + + def process_bulk_workspace_relationships_worker( + self, + bulk_workspace_relationship: dict, + partition: pd.DataFrame, + from_workspace: str, + to_workspace: str, + results: list | None = None, + ): + """This is the thread worker to create workspaces relationships in bulk. + Each worker thread gets a partition of the rows that include + the data required for the workspace relationship creation. + + Args: + bulk_workspace_relationship (dict): payload of the bulkWorkspaceRelationship + partition (pd.DataFrame): Data partition with rows to process + from_workspace (str): string pattern for nickname of workspace (from) + to_workspace (str): string pattern for nickname of workspace (to) + results (list): mutable list of thread results + """ + + thread_id = threading.get_ident() + + logger.info( + "Start working on data set partition of size -> %s...", + str(len(partition)), + ) + + result = {} + result["thread_id"] = thread_id + result["success_counter"] = 0 + result["failure_counter"] = 0 + result["skipped_counter"] = 0 + result["relationships"] = {} + result["success"] = True + + # Check if workspace relationships have been processed before, e.i. testing + # if a "relationships" key exists and if it is pointing to a non-empty list: + if ( + "relationships" in bulk_workspace_relationship + and bulk_workspace_relationship["relationships"] + ): + existing_workspace_relationships = bulk_workspace_relationship[ + "relationships" + ] + logger.info( + "Found %s already processed workspace relationships. Try to complete the job...", + str(len(existing_workspace_relationships)), + ) + else: + existing_workspace_relationships = {} + + # Get dictionary of replacements for bulk workspace relationships + # this we will be used of all places data is read from the + # current data frame row. Each dictionary item has the field name as the + # dictionary key and a list of regular expressions as dictionary value + replacements = bulk_workspace_relationship.get("replacements", None) + + # Check if all data conditions to create the workspace relationship are met + conditions = bulk_workspace_relationship.get("conditions", None) + + # Type of the relationship - can either be child or parent. + relationship_type = bulk_workspace_relationship.get("type", "child") + + # Nicknames are very limited in terms of allowed characters. + # For nicknames we need additional regexp as we need to + # replace all non-alphanumeric, non-space characters with "" + # We also preserve hyphens in the first step to replace + # them below with underscores. This is important to avoid + # that different spellings of names produce different nicknames. + # We want spellings with spaces match spellings with hyphens. + # For this the workspace names have a regexp "-| " in the payload. + nickname_additional_regex_list = [r"[^\w\s-]"] + + # Process all datasets in the partion that was given to the thread: + for index, row in partition.iterrows(): + + # ensure clean variables by reset + from_workspace_id = None + to_workspace_id = None + + logger.info( + "Processing data row -> %s for bulk workspace relationship creation...", + str(index), + ) + + # check if we have any exlusions that apply here: + if conditions: + evaluated_condition = self.evaluate_conditions( + conditions=conditions, row=row, replacements=replacements ) - success = False - elif result != "": + if not evaluated_condition: + logger.info( + "Condition for row -> %s not met. Skipping row for workspace relationship", + str(index), + ) + result["skipped_counter"] += 1 + continue + + # Determine the workspace "from" nickname: + from_workspace_nickname = self.replace_bulk_placeholders( + input_string=from_workspace, + row=row, + replacements=replacements, + additional_regex_list=nickname_additional_regex_list, + ) + if not from_workspace_nickname: + logger.warning( + "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (from)!", + str(index), + from_workspace, + ) + result["skipped_counter"] += 1 + continue + from_workspace_type = bulk_workspace_relationship.get( + "from_workspace_type", None + ) + from_workspace_name = bulk_workspace_relationship.get( + "from_workspace_name", None + ) + if from_workspace_name: + from_workspace_name = self.replace_bulk_placeholders( + input_string=from_workspace_name, + row=row, + replacements=replacements, + ) + if not from_workspace_name: + logger.warning( + "Row -> %s does not have the required data to resolve -> %s for the workspace name (from)!", + str(index), + from_workspace, + ) + result["skipped_counter"] += 1 + continue + from_workspace_data_source = bulk_workspace_relationship.get( + "from_workspace_data_source", None + ) + + (from_workspace_id, from_workspace_name) = ( + self.process_bulk_workspaces_lookup( + workspace_nickname=from_workspace_nickname, + workspace_name=from_workspace_name, + workspace_type=from_workspace_type, + data_source_name=from_workspace_data_source, + ) + ) + + if not from_workspace_id: + logger.warning( + "Cannot find Workspace to establish relationship (from)%s%s%s%s", + ( + ", Nickname -> '{}'".format(from_workspace_nickname) + if from_workspace_nickname + else "" + ), + ( + ", Workspace Name -> '{}'".format(from_workspace_name) + if from_workspace_name + else "" + ), + ( + ", Workspace Type -> '{}'".format(from_workspace_type) + if from_workspace_type + else "" + ), + ( + ", Data Source -> '{}'".format(from_workspace_data_source) + if from_workspace_data_source + else "" + ), + ) + # Lower severity of this issue + # result["failure_counter"] += 1 + # result["success"] = False + result["skipped_counter"] += 1 + continue + + # Determine the workspace "to" nickname: + to_workspace_nickname = self.replace_bulk_placeholders( + input_string=to_workspace, + row=row, + replacements=replacements, + additional_regex_list=nickname_additional_regex_list, + ) + if not to_workspace_nickname: + logger.warning( + "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (to)!", + str(index), + to_workspace, + ) + result["failure_counter"] += 1 + continue + to_workspace_type = bulk_workspace_relationship.get( + "to_workspace_type", None + ) + to_workspace_name = bulk_workspace_relationship.get( + "to_workspace_name", None + ) + if to_workspace_name: + to_workspace_name = self.replace_bulk_placeholders( + input_string=to_workspace_name, + row=row, + replacements=replacements, + ) + if not to_workspace_name: + logger.warning( + "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (to)!", + str(index), + from_workspace, + ) + result["skipped_counter"] += 1 + continue + to_workspace_data_source = bulk_workspace_relationship.get( + "to_workspace_data_source", None + ) + + (to_workspace_id, to_workspace_name) = self.process_bulk_workspaces_lookup( + workspace_nickname=to_workspace_nickname, + workspace_name=to_workspace_name, + workspace_type=to_workspace_type, + data_source_name=to_workspace_data_source, + ) + + if not to_workspace_id: + logger.warning( + "Cannot find Workspace to establish relationship (to)%s%s%s%s", + ( + ", Nickname -> '{}'".format(to_workspace_nickname) + if to_workspace_nickname + else "" + ), + ( + ", Workspace Name -> '{}'".format(to_workspace_name) + if to_workspace_name + else "" + ), + ( + ", Workspace Type -> '{}'".format(to_workspace_type) + if to_workspace_type + else "" + ), + ( + ", Data Source -> '{}'".format(to_workspace_data_source) + if to_workspace_data_source + else "" + ), + ) + # Lower severity of this issue + # result["failure_counter"] += 1 + # result["success"] = False + result["skipped_counter"] += 1 + continue + + # Check if workspace relationship has been created before (either in this run + # or in a former run of the customizer): + if ( # processed in former run? + from_workspace_id in existing_workspace_relationships + and to_workspace_id + in existing_workspace_relationships[from_workspace_id] + ) or ( # processed in current run? + from_workspace_id in result["relationships"] + and to_workspace_id in result["relationships"][from_workspace_id] + ): logger.info( - "Execution of command -> %s in pod -> %s returned result -> %s", - command, - pod_name, - result, + "Workspace relationship between workspace -> '%s' (%s) and related workspace -> '%s' (%s) has successfully been processed before. Skipping...", + from_workspace_name, + str(from_workspace_id), + to_workspace_name, + str(to_workspace_id), + ) + result["skipped_counter"] += 1 + continue + + # Check if workspace relationship does already exist in Extended ECM + # (this is an additional safety measure to avoid errors): + response = self._otcs_frontend.get_workspace_relationships( + workspace_id=from_workspace_id, + relationship_type=relationship_type, + related_workspace_name=to_workspace_name, + ) + current_workspace_relationships = self._otcs.exist_result_item( + response, "id", to_workspace_id + ) + if current_workspace_relationships: + logger.info( + "Workspace relationship between workspace -> '%s' (%s) and related workspace -> '%s' (%s) does already exist. Skipping...", + from_workspace_name, + str(from_workspace_id), + to_workspace_name, + str(to_workspace_id), ) + result["skipped_counter"] += 1 + continue + + logger.info( + "Bulk create workspace relationship '%s' (%s) -> '%s' (%s)...", + from_workspace_name, + str(from_workspace_id), + to_workspace_name, + str(to_workspace_id), + ) + + response = self._otcs_frontend.create_workspace_relationship( + workspace_id=from_workspace_id, + related_workspace_id=to_workspace_id, + relationship_type=relationship_type, + show_error=False, + ) + + if response is None: + # Potential race condition: see if the workspace-2-workspace relationship has been created by a concurrent thread. + # So we better check if the relationship is there even if the create_workspace_relationship() call delivered + # a 'None' response: + response = self._otcs_frontend.get_workspace_relationships( + workspace_id=from_workspace_id, + relationship_type=relationship_type, + related_workspace_name=to_workspace_name, + ) + current_workspace_relationships = self._otcs.exist_result_item( + response, "id", to_workspace_id + ) + if current_workspace_relationships: + logger.info( + "Workspace relationship between workspace -> '%s' (%s) and related workspace -> '%s' (%s) has been created concurrently. Skipping...", + from_workspace_name, + str(from_workspace_id), + to_workspace_name, + str(to_workspace_id), + ) + result["skipped_counter"] += 1 + continue + else: + logger.error( + "Failed to bulk create workspace relationship (%s) from -> '%s' (%s) to -> '%s' (%s)!", + relationship_type, + from_workspace_name, + str(from_workspace_id), + to_workspace_name, + str(to_workspace_id), + ) + result["success"] = False + result["failure_counter"] += 1 else: - # It is not an error if no result is returned. It depends on the nature of the command - # if a result is written to stdout or stderr. logger.info( - "Execution of command -> %s in pod -> %s did not return a result", - command, - pod_name, + "Successfully created bulk workspace relationship (%s) from -> '%s' (%s) to -> '%s' (%s)", + relationship_type, + from_workspace_name, + str(from_workspace_id), + to_workspace_name, + str(to_workspace_id), + ) + result["success_counter"] += 1 + # Record the workspace name and ID to allow to read it from failure file + # and speedup the process. + if not from_workspace_id in result["relationships"]: + # Initialize the "to" list: + result["relationships"][from_workspace_id] = [to_workspace_id] + else: + result["relationships"][from_workspace_id].append(to_workspace_id) + + logger.info("End working...") + + results.append(result) + + return + + # end method definition + + def prepare_category_data( + self, categories_payload: dict, source_node_id: int + ) -> dict | None: + """Prepare the metadata structure for a new workspace or document + + Args: + categories_payload (dict): Payload information for the workspace or document categories + source_node_id (int): Item to derive or inherit the category data from. We expect this to + be a folder, workspace or document that has the category assigned. + + Returns: + dict | None: Category data structure required for subsequent document upload. + """ + + response = self._otcs.get_node_categories(node_id=source_node_id) + if not response or not response["results"]: + logger.warning( + "Document parent with ID -> %s does not inherit categories but we have category payload for document!" + ) + return None + + # get the list of inherited categories: + inherited_categories = response["results"] + + # initialize the result dict we will return at the end: + category_data = {} + + # we iterate over all parent categories that are inherited + # to the new document and try to find matching payload values... + for inherited_category in inherited_categories: + # data = inherited_category["data"]["categories"] + + # We use the "metadata_order" which is a list of typically one + # element that includes the category ID: + metadata_order = inherited_category["metadata_order"] + + # if it is not a list or empty we continue: + if not metadata_order["categories"] or not isinstance( + metadata_order["categories"], list + ): + continue + inherited_category_id = metadata_order["categories"][0] + + # We use the "metadata" dict to determine the category name + # the keys of the dict are the category ID and attribute IDs + # the first element in the dict is always the category itself. + metadata = inherited_category["metadata"]["categories"] + category_name = metadata[str(inherited_category_id)]["name"] + + logger.debug( + "Source node ID -> %s has category -> '%s' (%s)", + source_node_id, + category_name, + inherited_category_id, + ) + + # The following method returns two values: the category ID and + # a dict of the attributes. If the category is not found + # on the parent node it returns -1 for the category ID + # and an empty dict for the attribute definitions: + ( + category_id, + attribute_definitions, + ) = self._otcs.get_node_category_definition(source_node_id, category_name) + if category_id == -1: + logger.error( + "The item with ID -> %s does not have the specified category -> %s assigned. Skipping...", + source_node_id, + category_name, ) + continue - self.write_status_file(success, section_name, self._exec_pod_commands) + category_data[str(category_id)] = {} - return success + logger.debug( + "Processing the attributes in payload to find values for the inherited category -> '%s' (%s)...", + category_name, + category_id, + ) + # now we fill the prepared (but empty) category_data + # with the actual attribute values from the payload: + for attribute in categories_payload: + attribute_name = attribute["attribute"] + set_name = attribute.get("set", "") + row = attribute.get("row", "") + if attribute["name"] != category_name: + logger.debug( + "Attribute -> '%s' does not belong to category -> '%s'. Skipping...", + attribute_name, + category_name, + ) + continue + attribute_value = attribute["value"] - # end method definition + # Set attributes are constructed with : + # by method get_node_category_definition(). This is not + # an OTCS REST syntax but specific for payload.py + if set_name: + attribute_name = set_name + ":" + attribute_name - def process_document_generators( - self, section_name: str = "documentGenerators" - ) -> bool: - """Generate documents for a defined workspace type based on template + if not attribute_name in attribute_definitions: + logger.error( + "Illegal attribute name -> '%s' in payload for category -> '%s'", + attribute_name, + category_name, + ) + continue + attribute_type = attribute_definitions[attribute_name]["type"] + attribute_id = attribute_definitions[attribute_name]["id"] + # For multi-line sets the "x" is the placeholder for the + # row number. We need to replace it with the actual row number + # given in the payload: + if "_x_" in attribute_id: + if not row: + logger.error( + "Row number is not specified in payload for attribute -> '%s' (%s)", + attribute_name, + attribute_id, + ) + continue + attribute_id = attribute_id.replace("_x_", "_" + str(row) + "_") + + # Special treatment for type user: determine the ID for the login name. + # the ID is the actual value we have to put in the attribute: + if attribute_type == "user": + user = self._otcs.get_user(name=attribute_value, show_error=True) + user_id = self._otcs.get_result_value(response=user, key="id") + if not user_id: + logger.error( + "Cannot find user with login name -> '%s'. Skipping...", + attribute_value, + ) + continue + attribute_value = user_id + category_data[str(category_id)][attribute_id] = attribute_value + # end for categories + + logger.debug("Resulting category data -> %s", str(category_data)) + + return category_data + + # end method definition + + def process_bulk_documents(self, section_name: str = "bulkDocuments") -> bool: + """Process bulkDocuments in payload and bulk create them in Extended ECM (multi-threaded). Args: section_name (str, optional): name of the section. It can be overridden for cases where multiple sections of same type - are used (e.g. the "Post" sections) + are used (e.g. the "Post" sections like "workspacesPost") This name is also used for the "success" status files written to the Admin Personal Workspace Returns: bool: True if payload has been processed without errors, False otherwise """ - if not self._doc_generators: - logger.info("Payload section -> %s is empty. Skipping...", section_name) + if not self._bulk_documents: + logger.info("Payload section -> '%s' is empty. Skipping...", section_name) return True + if not pandas_installed: + logger.info("Python module 'Pandas' not installed. Skipping...") + return False + # If this payload section has been processed successfully before we # can return True and skip processing it once more: - if self.check_status_file(section_name): + if self.check_status_file(payload_section_name=section_name): return True - success: bool = True - - # save admin credentials for later switch back to admin user: - admin_credentials = self._otcs.credentials() - authenticated_user = "admin" - - for doc_generator in self._doc_generators: - if not "workspace_type" in doc_generator: + # For efficient idem-potent operation we may want to see which workspaces + # have already been processed before: + if self.check_status_file( + payload_section_name=section_name, payload_specific=True, prefix="failure_" + ): + logger.info( + "Found failure file. Trying to reprocess just the failed ones..." + ) + # Read payload from where we left it last time + self._bulk_documents = self.get_status_file( + payload_section_name=section_name, prefix="failure_" + ) + if not self._bulk_documents: logger.error( - "To generate documents for workspaces the workspace type needs to be specified in the payload! Skipping to next document generator..." + "Cannot load existing bulkDocuments failure file. Bailing out!" ) - success = False - continue - workspace_type = doc_generator["workspace_type"] + return False + + success: bool = True + for bulk_document in self._bulk_documents: # Check if element has been disabled in payload (enabled = false). # In this case we skip the element: - if "enabled" in doc_generator and not doc_generator["enabled"]: - logger.info( - "Payload for document generator of workspace type -> %s is disabled. Skipping...", - workspace_type, - ) + if "enabled" in bulk_document and not bulk_document["enabled"]: + logger.info("Payload for Bulk Document is disabled. Skipping...") continue - if not "template_path" in doc_generator: - logger.error( - "To generate documents for workspaces of type -> %s the path to the document template needs to be specified in the payload! Skipping to next document generator...", - workspace_type, - ) - success = False - continue - template_path = doc_generator["template_path"] - # 20541 is the ID of the Document Template Volume which - # (we hope) is stable: - template = self._otcs.get_node_by_volume_and_path(20541, template_path) - if not template: - logger.error( - "Cannot find document template in path -> %s. Skipping to next document generator...", - template_path, - ) - success = False - continue - template_id = self._otcs.get_result_value(template, "id") - template_name = self._otcs.get_result_value(template, "name") + copy_data_source = bulk_document.get("copy_data_source", False) + force_reload = bulk_document.get("force_reload", True) - if not "classification_path" in doc_generator: - logger.error( - "To generate documents for workspaces of type -> %s the path to the document classification needs to be specified in the payload! Skipping to next document generator...", - workspace_type, - ) - success = False - continue - classification_path = doc_generator["classification_path"] - classification = self._otcs.get_node_by_volume_and_path( - 198, classification_path - ) - if not classification: - logger.error( - "Cannot find document classification in path -> %s. Skipping to next document generator...", - classification_path, - ) + # The payload element must have a "data_source" key: + if not "data_source" in bulk_document: + logger.error("No data source specified in Bulk Document!") success = False continue - classification_id = self._otcs.get_result_value(classification, "id") + data_source_name = bulk_document["data_source"] - # "category_name" is optional. But if it is specified - # then also "attributes" needs to be specified: - if not "category_name" in doc_generator: + # Load and prepare the data source for the bulk processing: + if copy_data_source: logger.info( - "No metadata (category name) specified in the payload for this document generator.", + "Take a copy of data source -> %s to avoid sideeffects for repeative usage of the data source...", + bulk_document["data_source"], ) - category_name = "" - attributes = {} - category_data = {} - else: - category_name = doc_generator["category_name"] - if not "attributes" in doc_generator: - logger.error( - "To generate documents for workspaces of type -> %s with metadata, the attributes needs to be specified in the payload! Skipping to next document generator...", - workspace_type, - ) - success = False - continue - attributes = doc_generator["attributes"] - - # The following method returns two values: the category ID and - # a dict of the attributes. If the category is not found - # on the document template it returns -1 for the category ID - # and an empty dict for the attribute definitions: - ( - category_id, - attribute_definitions, - ) = self._otcs.get_node_category_definition(template_id, category_name) - if category_id == -1: - logger.error( - "The document template -> %s does not have the specified category -> %s. Skipping to next document generator...", - template_name, - category_name, + data = Data( + self.process_bulk_datasource( + data_source_name=data_source_name, force_reload=force_reload ) - success = False - continue - - category_data = {str(category_id): {}} - - # now we fill the prepared (but empty) category_data - # with the actual attribute values from the payload: - for attribute in attributes: - attribute_name = attribute["name"] - attribute_value = attribute["value"] - attribute_type = attribute_definitions[attribute_name]["type"] - attribute_id = attribute_definitions[attribute_name]["id"] - - # Special treatment for type user: determine the ID for the login name. - # the ID is the actual value we have to put in the attribute: - if attribute_type == "user": - user = self._otcs.get_user(attribute_value, show_error=True) - user_id = self._otcs.get_result_value(response=user, key="id") - if not user_id: - logger.error( - "Cannot find user with login name -> %s. Skipping...", - attribute_value, - ) - success = False - continue - attribute_value = user_id - category_data[str(category_id)][attribute_id] = attribute_value - - if not "workspace_folder_path" in doc_generator: - logger.info( - "No workspace folder path defined for workspaces of type -> %s. Documents will be stored in workspace root.", - workspace_type, ) - workspace_folder_path = [] else: - workspace_folder_path = doc_generator["workspace_folder_path"] - - if "exec_as_user" in doc_generator: - exec_as_user = doc_generator["exec_as_user"] - - # Find the user in the users payload: - exec_user = next( - (item for item in self._users if item["name"] == exec_as_user), - None, + data = self.process_bulk_datasource( + data_source_name=data_source_name, force_reload=force_reload ) - # Have we found the user in the payload? - if exec_user is not None: - logger.info( - "Executing document generator with user -> %s", exec_as_user - ) - # we change the otcs credentials to the user: - self._otcs.set_credentials(exec_user["name"], exec_user["password"]) - - # we re-authenticate as the user: - logger.info("Authenticate user -> %s...", exec_as_user) - # True = force new login with new user - cookie = self._otcs.authenticate(revalidate=True) - if not cookie: - logger.error("Couldn't authenticate user -> %s", exec_as_user) - continue - admin_context = False - authenticated_user = exec_as_user - else: - logger.error( - "Cannot find user with login name -> %s for executing. Executing as admin...", - exec_as_user, - ) - admin_context = True - success = False - else: - admin_context = True - exec_as_user = "admin" - - if admin_context and authenticated_user != "admin": - # Set back admin credentials: - self._otcs.set_credentials( - admin_credentials["username"], admin_credentials["password"] + if not data: + logger.error( + "Failed to load data source for bulk documents!", ) + continue - # we re-authenticate as the admin user: + # Check if fields with list substructures should be exploded. + # We may want to do this outside the bulkDatasource to only + # explode for bulkDocuments and not for bulkWorkspaces or + # bulkWorkspaceRelationships: + explosions = bulk_document.get("explosions", []) + for explosion in explosions: + # explode field can be a string or a list + # exploding multiple fields at once avoids + # combinatorial explosions - this is VERY + # different from exploding columns one after the other! + if not "explode_field" in explosion: + logger.error("Missing explosion field(s)!") + continue + explode_field = explosion["explode_field"] + flatten_fields = explosion.get("flatten_fields", []) + split_string_to_list = explosion.get("split_string_to_list", False) + logger.info( + "Starting explosion of bulk documents by field(s) -> %s (type -> %s). Size of data set before explosion -> %s", + explode_field, + str(type(explode_field)), + str(len(data)), + ) + data.explode_and_flatten( + explode_field=explode_field, + flatten_fields=flatten_fields, + make_unique=False, + split_string_to_list=split_string_to_list, + ) + logger.info("Size of data set after explosion -> %s", str(len(data))) + + # Check if duplicate lines for given fields should be removed: + if "unique" in bulk_document and bulk_document["unique"]: + unique_fields = bulk_document["unique"] logger.info( - "Authenticate as admin user -> %s...", admin_credentials["username"] + "Starting deduplication of data set for bulk documents with unique fields -> %s. Size of data set before deduplication -> %s", + str(unique_fields), + str(len(data)), + ) + data.deduplicate(unique_fields=unique_fields, inplace=True) + logger.info( + "Size of data set after deduplication -> %s", str(len(data)) ) - # True = force new login with new user - cookie = self._otcs.authenticate(revalidate=True) - authenticated_user = "admin" - if category_data: + # Sort the data set if "sort" specified in payload. We may want to do this to have a + # higher chance that rows with workspace names that may collapse into + # one name are put into the same partition. This can avoid race conditions + # between different Python threads. + if "sort" in bulk_document and bulk_document["sort"]: + sort_fields = bulk_document["sort"] logger.info( - "Generate documents for workspace type -> %s based on template -> %s with metadata -> %s...", - workspace_type, - template_name, - category_data, + "Start sorting of bulk document data set based on fields (columns) -> %s...", + str(sort_fields), ) - else: + data.sort(sort_fields=sort_fields, inplace=True) logger.info( - "Generate documents for workspace type -> %s based on template -> %s without metadata...", - workspace_type, - template_name, + "Sorting of bulk document data set based on fields (columns) -> %s completed!", + str(sort_fields), ) - # Find the workspace type with the name given in the _workspace_types - # datastructure that has been generated by process_workspace_types() method before: - workspace_type_id = next( - ( - item["id"] - for item in self._workspace_types - if item["name"] == workspace_type - ), - None, - ) - workspace_instances = self._otcs.get_workspace_instances( - type_name=workspace_type, type_id=workspace_type_id - ) - if not workspace_instances or not workspace_instances["results"]: - logger.warning( - "No workspace instances found for workspace type -> %s (%s)", - workspace_type, - workspace_type_id, + # Read name field from payload: + if not "name" in bulk_document or not bulk_document["name"]: + logger.error( + "Bulk Document needs a name field! Skipping to next bulk document..." ) - for workspace_instance in workspace_instances["results"]: - workspace_id = workspace_instance["data"]["properties"]["id"] - workspace_name = workspace_instance["data"]["properties"]["name"] - if workspace_folder_path: - workspace_folder = self._otcs.get_node_by_workspace_and_path( - workspace_id, workspace_folder_path - ) - if workspace_folder: - workspace_folder_id = self._otcs.get_result_value( - workspace_folder, "id" - ) - else: - # If the workspace template is not matching - # the path we may have an error here. Then - # we fall back to workspace root level. - logger.warning( - "Folder path does not exist in workspace -> %s. Using workspace root level instead...", - workspace_name, - ) - workspace_folder_id = workspace_id - else: - workspace_folder_id = workspace_id + success = False + continue + name_field = bulk_document["name"] - document_name = workspace_name + " - " + template_name - logger.info("Generate document -> %s", document_name) + data_source_name = bulk_document["data_source"] + self._log_header_callback( + text="Process Bulk Documents -> '{}' from data source -> '{}'".format( + name_field, data_source_name + ), + char="-", + ) - response = self._otcs.check_node_name( - int(workspace_folder_id), document_name + # Read optional description field from payload: + description_field = bulk_document.get("description", None) + + if not "categories" in bulk_document or not bulk_document["categories"]: + logger.info( + "Bulk document payload has no category data! Will leave category attributes empty..." ) - if response["results"]: - logger.warning( - "Node with name -> %s does already exist in workspace folder with ID -> %s", - document_name, - workspace_folder_id, + categories = None + else: + categories = bulk_document["categories"] + + # Should existing documents be updated? False (= no) is the default. + enforce_updates = bulk_document.get("enforce_updates", False) + + logger.info( + "Bulk create Documents (name field -> %s. Enforce Updates -> %s.)", + name_field, + str(enforce_updates), + ) + + bulk_thread_number = int( + bulk_document.get("thread_number", BULK_THREAD_NUMBER) + ) + + partitions = data.partitionate(bulk_thread_number) + + # Create a list to hold the threads + threads = [] + results = [] + + # Define source OTCS object and authenticate once and pass it to all workers if needed + if bulk_document.get("source_type", "URL").lower() == "contentserver": + + if bulk_document.get("cs_hostname") is None: + source_otcs = None + logger.error( + "Required information for source type ContentServer is not configured -> cs_hostname" ) continue - response = self._otcs.create_document_from_template( - int(template_id), - int(workspace_folder_id), - int(classification_id), - category_data, - document_name, - "This document has been auto-generated by Terrarium", - ) - if not response: + + elif bulk_document.get("cs_username") is None: + source_otcs = None logger.error( - "Failed to generate document -> %s in workspace -> %s (%s) as user -> %s", - document_name, - workspace_name, - workspace_id, - exec_as_user, + "Required information for source type ContentServer is not configured -> cs_username" + ) + continue + + elif bulk_document.get("cs_password") is None: + source_otcs = None + logger.error( + "Required information for source type ContentServer is not configured -> cs_password" + ) + continue + + else: + + logger.info("Generating reusable OTCS instance for bulk processing") + + source_otcs = OTCS( + protocol=bulk_document.get("cs_protocol", "https"), + hostname=bulk_document.get("cs_hostname"), + port=bulk_document.get("cs_port", "443"), + base_path=bulk_document.get("cs_basepath", "/cs/cs"), + username=bulk_document.get("cs_username"), + password=bulk_document.get("cs_password"), + ) + source_otcs.authenticate() + + else: + source_otcs = None + + # Create and start a thread for each partition + for index, partition in enumerate(partitions, start=1): + thread = threading.Thread( + name=f"{section_name}_{index:02}", + target=self.thread_wrapper, + args=( + self.process_bulk_documents_worker, + bulk_document, + partition, + name_field, + description_field, + categories, + enforce_updates, + results, + source_otcs, + ), + ) + # start a thread executing the process_bulk_workspaces_worker() mthod below: + logger.info("Starting Thread -> %s...", str(thread.name)) + thread.start() + threads.append(thread) + + # Wait for all threads to complete + for thread in threads: + logger.info("Waiting for Thread -> %s to complete...", str(thread.name)) + thread.join() + logger.info("Thread -> %s has completed.", str(thread.name)) + + if not "documents" in bulk_document: + bulk_document["documents"] = {} + for result in results: + if not result["success"]: + logger.info( + "Thread -> %s completed with %s failed, %s skipped, and %s created documents.", + str(result["thread_id"]), + str(result["failure_counter"]), + str(result["skipped_counter"]), + str(result["success_counter"]), ) success = False else: logger.info( - "Successfully generated document -> %s in workspace -> %s", - document_name, - workspace_name, + "Thread -> %s completed successful with %s skipped, and %s created documents.", + str(result["thread_id"]), + str(result["skipped_counter"]), + str(result["success_counter"]), ) + # Record all generated documents. If this should allow us + # to restart in case of failures and avoid trying to + # uploading that have been successfully uploaded before + bulk_document["documents"].update(result["documents"]) - if authenticated_user != "admin": - # Set back admin credentials: - self._otcs.set_credentials( - admin_credentials["username"], admin_credentials["password"] - ) - - # we authenticate back as the admin user: - logger.info( - "Authenticate as admin user -> %s...", admin_credentials["username"] - ) - # True = force new login with new user - cookie = self._otcs.authenticate(revalidate=True) - - self.write_status_file(success, section_name, self._doc_generators) + self.write_status_file(success, section_name, self._bulk_documents) return success - # end method definition + # end method definition - def process_browser_automations( + def process_bulk_documents_worker( self, - browser_automations: list, - section_name: str = "browserAutomations", - check_status: bool = True, - ) -> bool: - """Process Selenium-based browser automations. + bulk_document: dict, + partition: pd.DataFrame, + name_field: str, + description_field: str, + categories: list | None = None, + enforce_updates: bool = False, + results: list | None = None, + source_otcs: OTCS | None = None, + ): + """This is the thread worker to download + create documents in bulk. + Each worker thread gets a partition of the rows that include + the data required for the document creation. Args: - browser_automations (list): list of browser_automations (need this as parameter as we - have multiple lists) - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace - check_status (bool, optional): defines whether or not this needs to re-run - for each customizer run (even if it has been successful before). - If check_status is True (default) then it is only re-run - if it has NOT been successfully before. - Returns: - bool: True if payload has been processed without errors, False otherwise + bulk_document (dict): bulkDocument payload element + partition (pd.DataFrame): Data partition with rows to process + name_field (str): Field where the workspace name is stored + description_field (str): Field where the workspace description is stored + categories (list): list of category dictionieres + enforce_updates (bool): should existing documents be updated with new version and metadata? + results (list): mutable list of thread results """ - if not browser_automations: - logger.info("Payload section -> %s is empty. Skipping...", section_name) - return True + thread_id = threading.get_ident() - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if check_status and self.check_status_file(section_name): - return True + logger.info( + "Start working on data set partition of size -> %s to bulk create documents...", + str(len(partition)), + ) - success: bool = True + result = {} + result["thread_id"] = thread_id + result["success_counter"] = 0 + result["failure_counter"] = 0 + result["skipped_counter"] = 0 + result["documents"] = {} + result["success"] = True + + # Check if documents have been processed before, e.i. testing + # if a "documents" key exists and if it is pointing to a non-empty list: + # Additionally we check that workspace updates are not enforced: + if bulk_document.get("documents", None) and not enforce_updates: + existing_documents = bulk_document["documents"] + logger.info( + "Found %s already processed documents. Try to complete the job...", + str(len(existing_documents)), + ) + else: + existing_documents = {} + + # See if external creation and modification dates are in the data: + external_modify_date_field = bulk_document.get("external_modify_date", None) + external_create_date_field = bulk_document.get("external_create_date", None) + + # See if we have a key field to uniquely identify an existing document: + key_field = bulk_document.get("key", None) + + # Read "download retry number" and "wait before retry" duration from payload + # (if specified) otherwise set default values + wait_time = bulk_document.get("download_wait_time", 30) + retries = bulk_document.get("download_retries", 2) + + # Get dictionary of replacements for bulk document creations + # this we will be used of all places data is read from the + # data frame. Each dictionary item has the field name as the + # dictionary key and a list of regular expressions as dictionary value + replacements = bulk_document.get("replacements", None) + + # In case the name cannot be resolved we allow to + # specify an alternative name field in the payload. + name_field_alt = bulk_document.get("name_alt", None) + + # If download_name field is not in payload we use name_field instead. + # It can still be that download_name is "" as name_field is only + # used if the entry for "download_name" is not in payload at all. + download_name_field = bulk_document.get("download_name", name_field) + + # Document names are limited in terms of allowed characters. + # In particular we don't want any path elements and "/" dividers + # in the document name. Just everything after the last "/" if + # document_name includes any "/". + document_name_additional_regex_list = [bulk_document.get("name_regex", r".*/")] + + # Fetch the nickname field from the payload (if it is specified): + nickname_field = bulk_document.get("nickname", None) + + # Nicknames are very limited in terms of allowed characters. + # For nicknames we need additional regexp as we need to + # replace all non-alphanumeric, non-space characters with "" + # We also preserve hyphens in the first step to replace + # them below with underscores. This is important to avoid + # that different spellings of names produce different nicknames. + # We want spellings with spaces match spellings with hyphens. + # For this the workspace names have a regexp "-| " in the payload. + nickname_additional_regex_list = [r"[^\w\s-]"] + + # Process all datasets in the partion that was given to the thread: + for index, row in partition.iterrows(): + logger.info( + "Processing data row -> %s for bulk document creation...", + str(index), + ) - for browser_automation in browser_automations: - description = browser_automation.get("description", "") + parent_id = None - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in browser_automation and not browser_automation["enabled"]: - logger.info( - "Payload for Browser Automation -> %s is disabled. Skipping...", - description, + # Check if all data conditions to create the document are met + conditions = bulk_document.get("conditions", None) + if conditions: + evaluated_condition = self.evaluate_conditions(conditions, row) + if not evaluated_condition: + logger.info( + "Document condition for row -> %s not met. Skipping row for document creation...", + str(index), + ) + result["skipped_counter"] += 1 + continue + + document_name = self.replace_bulk_placeholders( + input_string=name_field, + row=row, + replacements=replacements, + additional_regex_list=document_name_additional_regex_list, + ) + # If we cannot get the document_name from the + # name_field we try the alternative name field + # (if specified in payload): + if not document_name and name_field_alt: + logger.debug( + "Row -> %s does not have the data to resolve the placeholders in document name -> %s! Trying alternative name field...", + str(index), + name_field, + ) + document_name = self.replace_bulk_placeholders( + input_string=name_field_alt, + row=row, + replacements=replacements, + additional_regex_list=document_name_additional_regex_list, + ) + if not document_name: + logger.error( + "Row -> %s does not have the data to resolve the placeholders in document name -> %s%s!", + str(index), + name_field, + ( + "nor in alternative name field -> " + name_field_alt + if name_field_alt + else "" + ), ) + result["skipped_counter"] += 1 continue - if not "name" in browser_automation: - logger.error("Browser automation is missing a unique name. Skipping...") - success = False - continue - name = browser_automation.get("name") + download_name = "" + if download_name_field: + download_name = self.replace_bulk_placeholders( + input_string=download_name_field, row=row, replacements=replacements + ) + if not download_name: + logger.warning( + "Download name is empty or row -> %s does not have the data to resolve the placeholders in document download name -> '%s'. Using -> '%s' instead!", + str(index), + download_name_field, + document_name, + ) + # in this case we use the document_name also as the download_name: + download_name = document_name - if description: + # Document names for sure are not allowed to have ":": + document_name = document_name.replace(":", "") + # Truncate the document name to 254 characters which is the maximum allowed length in Extended ECM + if len(document_name) > 254: + document_name = document_name[:254] + + # This is an optimization. We check if the document was created + # in a former run. This helps if the customizer gets re-run: + if document_name and document_name in existing_documents: logger.info( - "Processing Browser Automation -> %s (%s)...", - name, - description, + "Document -> '%s' does already exist and has ID -> %s. Skipping...", + document_name, + existing_documents[document_name], + ) + result["skipped_counter"] += 1 + continue + + if description_field: + description = self.replace_bulk_placeholders( + input_string=description_field, row=row ) + # Truncate the document description to 254 characters which is the maximum allowed length in Extended ECM + # if len(description) > 254: + # description = description[:254] else: - logger.info("Processing Browser Automation -> %s...", name) + description = "" - if not "base_url" in browser_automation: - logger.error("Browser automation is missing base_url. Skipping...") - success = False - continue - base_url = browser_automation.get("base_url") + # Determine the external creation field (if any): + if external_create_date_field: + external_create_date = self.replace_bulk_placeholders( + input_string=external_create_date_field, row=row + ) + else: + external_create_date = None - if not "user_name" in browser_automation: - logger.info("Browser automation is not having user name.") - user_name = browser_automation.get("user_name", "") + # Determine the external modification field (if any): + if external_modify_date_field: + external_modify_date = self.replace_bulk_placeholders( + input_string=external_modify_date_field, row=row + ) + else: + external_modify_date = None - if not "password" in browser_automation: - logger.info("Browser automation is not having password.") - password = browser_automation.get("password", "") + # Determine the key field (if any): + if key_field: + key = self.replace_bulk_placeholders(input_string=key_field, row=row) + else: + key = None - if not "automations" in browser_automation: - logger.error( - "Browser automation is missing list of automations. Skipping..." + # check if workspace with this nickname does already exist. + # we also store the nickname to assign it to the new workspace: + if nickname_field: + nickname = self.replace_bulk_placeholders( + input_string=nickname_field, + row=row, + replacements=replacements, + additional_regex_list=nickname_additional_regex_list, ) - success = False - continue - automations = browser_automation.get("automations", []) + else: + nickname = None - debug_automation: bool = browser_automation.get("debug", False) + logger.info( + "Bulk create document -> '%s'...", + document_name, + ) - # Create Selenium Browser Automation: - logger.info("Browser Automation base URL -> %s", base_url) - logger.info("Browser Automation User -> %s", user_name) - logger.debug("Browser Automation Password -> %s", password) - browser_automation_object = BrowserAutomation( - base_url=base_url, - user_name=user_name, - user_password=password, - automation_name=name, - take_screenshots=debug_automation, + download_url = bulk_document.get("download_url", None) + if download_url: + download_url = self.replace_bulk_placeholders( + input_string=download_url, row=row + ) + # Fetch alternative download URL (if avialable) + download_url_alt = bulk_document.get("download_url_alt", None) + + path = bulk_document.get("download_dir", BULK_DOCUMENT_PATH) + path = self.replace_bulk_placeholders(input_string=path, row=row) + + if not os.path.exists(path): + # Path does not exist, create it + os.makedirs(path) + + if not path.endswith("/"): + path += "/" + file_name = path + download_name + file_extension = bulk_document.get("file_extension", "") + file_extension = self.replace_bulk_placeholders( + input_string=file_extension, row=row ) - # Implicit Wait is a global setting (for whole brwoser session) - # This makes sure a page is fully loaded and elements are present - # before accessing them. We set 15.0 seconds as default if not - # otherwise specified by "wait_time" in the payload. - # See https://www.selenium.dev/documentation/webdriver/waits/ - wait_time = browser_automation.get("wait_time", 15.0) - browser_automation_object.implicit_wait(wait_time) - if "wait_time" in browser_automation: + if file_extension: + file_name += "." + file_extension + mime_type = bulk_document.get("mime_type", "application/pdf") + mime_type = self.replace_bulk_placeholders(input_string=mime_type, row=row) + + file_extension_alt = bulk_document.get("file_extension_alt", "html") + file_name_alt = path + download_name + "." + file_extension_alt + mime_type_alt = bulk_document.get("mime_type_alt", "text/html") + + delete_download = bulk_document.get("delete_download", True) + + logger.debug("Download name -> '%s'", download_name) + logger.debug("Document name -> '%s'", document_name) + logger.debug("Path -> '%s'", path) + + # Add support to find files with wildcards + for _, _, tmpfiles in os.walk(path): + for file_data in tmpfiles: + if fnmatch.fnmatch(file_data, download_name): + logger.debug("File name -> %s", file_data) + file_name = path + file_data + continue + + file_exists = os.path.exists(file_name) + # make sure there's no name conflict with stale documents: + if file_exists and delete_download: + os.remove(file_name) + file_exists = False + + file_exists_alt = os.path.exists(file_name_alt) + # make sure there's no name conflict with stale documents: + if file_exists_alt and delete_download: + os.remove(file_name_alt) + file_exists_alt = False + + if not file_exists and not file_exists_alt: + + source_type = bulk_document.get("source_type", "URL").lower() + match source_type: + + case "contentserver": + # Add new functionality to download the document from Extended ECM + cs_source_id = bulk_document.get("cs_source_id", "") + cs_source_id = path = self.replace_bulk_placeholders( + input_string=cs_source_id, row=row + ) + if ( + source_otcs is not None + and source_otcs.otcs_ticket() is not None + ): + logger.info( + "Downloading document from source Extended ECM (contentserver) with ID -> %s", + cs_source_id, + ) + + if source_otcs.download_document( + node_id=cs_source_id, file_path=file_name + ): + logger.debug( + "Successfully downloaded from Extended ECM using URL -> %s with ID -> %s to local file -> '%s'", + source_otcs.cs_public_url, + cs_source_id, + file_name, + ) + else: + logger.warning( + "Cannot download file from Extended ECM using URL -> %s with ID -> %s to local file -> '%s'. Skipping...", + source_otcs.cs_public_url, + cs_source_id, + file_name, + ) + result["skipped_counter"] += 1 + continue + else: + logger.error( + "Cannot download file with ID -> %s from Extended ECM. OTCS object not configured. Skipping...", + cs_source_id, + ) + result["skipped_counter"] += 1 + continue + + case "url": + # Default case, download from accessible URL + if not download_url: + logger.error( + "Download URL missing and we don't have an existing file in file system!", + ) + result["skipped_counter"] += 1 + continue + if not self._http_object.download_file( + url=download_url, + filename=file_name, + retries=retries, + wait_time=wait_time, + wait_on_status=[403], + show_error=False, + ): + # Check if we have an alternative download URL we try this now: + if download_url_alt: + download_url_alt = self.replace_bulk_placeholders( + input_string=download_url_alt, row=row + ) + if download_url_alt: + logger.warning( + "Cannot download file from -> %s to local file -> '%s'. Trying alternative download -> %s to file -> '%s'...", + download_url, + file_name, + download_url_alt, + file_name_alt, + ) + if self._http_object.download_file( + url=download_url_alt, + filename=file_name_alt, + retries=retries, + wait_time=wait_time, + wait_on_status=[403], + show_error=False, + ): + logger.debug( + "Successfully downloaded file from alternative URL -> %s to local file -> '%s'. Using this file...", + download_url_alt, + file_name_alt, + ) + mime_type = mime_type_alt + file_name = file_name_alt + else: + # as we cannot fully rely one data source we don't treat this + # as an error but a warning: + logger.warning( + "Cannot download file from alternative URL -> %s to local file -> '%s'. Skipping...", + download_url_alt, + file_name_alt, + ) + result["skipped_counter"] += 1 + continue + else: + # as we cannot fully rely one data source we don't treat this + # as an error but a warning: + logger.warning( + "Cannot download file from URL -> %s to local file -> '%s'. Skipping...", + download_url_alt, + file_name_alt, + ) + result["skipped_counter"] += 1 + continue + else: + logger.debug( + "Successfully downloaded file from -> %s to local file -> '%s'", + download_url, + file_name, + ) + + else: + if file_exists_alt: + file_name = file_name_alt + mime_type = mime_type_alt logger.info( - "Browser Automation Implicit Wait time -> %s configured", wait_time + "Reusing existing file -> '%s' in local storage.", + file_name, ) - for automation in automations: - if not "type" in automation: - logger.error("Browser automation step is missing type. Skipping...") + # Now we traverse a list of (multiple) workspaces + # the document should be uploaded to: + success = True + workspaces = bulk_document.get("workspaces", []) + for workspace in workspaces: + if not "workspace_name" in workspace: + logger.error( + "No workspace name field specified for document upload! Skipping document upload to this workspace...", + ) success = False - break - automation_type = automation.get("type", "") + continue + workspace_name_field = workspace["workspace_name"] + + workspace_name = self.replace_bulk_placeholders( + input_string=workspace_name_field, + row=row, + replacements=replacements, + ) + # it could be that the current data row does not have the + # required fields to resolve the workspace name placeholders + # then we skip uploading the document to this workspace + # but still keep status as successful (don't set success = False) + if not workspace_name: + logger.warning( + "Row -> %s does not have the required data to resolve workspace name field -> '%s' specified for document upload! Skipping document upload to this workspace...", + str(index), + workspace_name_field, + ) + # success = False - NO, DON'T DO THIS!!! + document_id = None # do this to avoid fatal error after the main loop where the success counters are set + continue # for workspace in workspaces + # Workspace names for sure are not allowed to have ":": + workspace_name = workspace_name.replace(":", "") + # Truncate the workspace name to 254 characters which is the maximum allowed length in Extended ECM + if len(workspace_name) > 254: + workspace_name = workspace_name[:254] + + # Check if all data conditions to create the workspace are met + conditions = workspace.get("conditions", None) + if conditions: + evaluated_condition = self.evaluate_conditions(conditions, row) + if not evaluated_condition: + logger.info( + "Workspace condition for row -> %s not met. Skipping row for document upload to workspace...", + str(index), + ) + continue # for workspace in workspaces + + if not "workspace_type" in workspace: + logger.error( + "No workspace type specified for document upload! Skipping workspace -> '%s'...", + workspace_name, + ) + success = False + continue # for workspace in workspaces + workspace_type = workspace["workspace_type"] + workspace_type = self.replace_bulk_placeholders( + input_string=workspace_type, + row=row, + replacements=replacements, + ) + workspace_data_source_name = workspace.get("data_source", None) + # Try to find the workspace by name/synonym and type: + (workspace_id, workspace_name) = self.process_bulk_workspaces_lookup( + workspace_name=workspace_name, + workspace_type=workspace_type, + data_source_name=workspace_data_source_name, + ) + if not workspace_id: + logger.warning( + "Cannot find workspace with name/synonym -> '%s' and type -> '%s'.", + workspace_name, + workspace_type, + ) + success = False + continue # for workspace in workspaces + + # If the workspace payload element has a "data_source" key, + # then add all columns from the given data source to the bulk + # document row to also support the lookup of values from the workspace + # data source. These fields get a "lookup_" prefix to avoid name clashes. + # the values must be specified with this "lookup_" prefix in the payload. + # We CANNOT do this at the very beginning of the workspace loop as we + # need the workspace_name to be properly resolved (incl. synonyms): + if workspace_data_source_name: + logger.info( + "Workspace for bulk documents has a data source -> '%s' with lookup values. Adding them as row columns...", + workspace_data_source_name, + ) + workspace_data_source = next( + ( + item + for item in self._bulk_datasources + if item["name"] == workspace_data_source_name + ), + None, + ) + # Read the synonym column and the name column from the data source payload item: + workspace_data_source_name_column = workspace_data_source.get( + "name_column", None # e.g. "Name" + ) + + if workspace_data_source_name_column: + # Get additional data from workspace datasource + # for lookups. Synonyms are already resolved at + # this point in time (workspace_name has been updated above + # in case the initial workspace name wasn't found) + lookup_row = self.lookup_data_source_value( + data_source=workspace_data_source, + lookup_column=workspace_data_source_name_column, + lookup_value=workspace_name, + ) - match automation_type: - case "login": - page = automation.get("page", "") + # Adding all values of the lookup row with the prefix lookup_ to the bulk documents row + # for replacement of placeholders: + if lookup_row is not None: + for k, value in lookup_row.items(): + row["lookup_" + k] = value + + # "workspace_folder" can be used if the payload contains + # the path as a comma-separated string (top down) + workspace_folder = workspace.get("workspace_folder", "") + + # we need to do a copy as the path list is a mutable data type that we modify below! + workspace_path = list(workspace.get("workspace_path", [])) + + if workspace_folder and not workspace_path: + workspace_folder = self.replace_bulk_placeholders( + input_string=workspace_folder, + row=row, + replacements=replacements, + ) + if "," in workspace_folder: + workspace_path = [ + item.strip() for item in workspace_folder.split(",") + ] + else: + workspace_path = [workspace_folder] + + if workspace_path: + # Replace placeholders in payload for the path elements: + # Note: workspace_path is a mutable data type that is changed in place! + result_placeholders = self.replace_bulk_placeholders_list( + input_list=workspace_path, + row=row, + replacements=replacements, + ) + if not result_placeholders: + logger.warning( + "Workspace folder path for workspace -> '%s' of workspace type -> '%s' cannot be resolved (placeholder issue). Using workspace root for document upload.", + workspace_name, + workspace_type, + ) + # we put the document into the root of the workspace: + parent_id = workspace_id + workspace_path = None + else: + # Check if the folder path does already exist and get the target folder at the end of the path: logger.info( - "Login to -> %s as user -> %s", base_url + page, user_name + "Check if path -> %s does already exist in workspace -> '%s' (%s)... (otherwise create it)", + str(workspace_path), + workspace_name, + workspace_id, ) - user_field = automation.get("user_field", "otds_username") - password_field = automation.get( - "password_field", "otds_password" + response = self._otcs_frontend.get_node_by_workspace_and_path( + workspace_id=workspace_id, + path=workspace_path, + create_path=True, # we want the path to be created if it doesn't exist + show_error=False, ) - login_button = automation.get("login_button", "loginbutton") - if not browser_automation_object.run_login( - user_field=user_field, - password_field=password_field, - login_button=login_button, - ): + parent_id = self._otcs_frontend.get_result_value(response, "id") + if not parent_id: logger.error( - "Cannot log into -> %s. Stopping automation.", - base_url + page, + "Failed to create path -> %s in workspace -> '%s' (%s)...", + str(workspace_path), + workspace_name, + workspace_id, ) success = False - break + continue # for workspace in workspaces else: logger.info( - "Successfuly logged into page -> %s.", base_url + page + "Successfully created path -> %s in workspace -> '%s' (%s). Node ID for target folder -> %s", + str(workspace_path), + workspace_name, + workspace_id, + str(parent_id), ) - case "get_page": - page = automation.get("page", "") - if not page: - logger.error( - "Automation type -> %s requires page parameter", - automation_type, + # end if workspace_path + else: + logger.info( + "Workspace folder path for workspace -> '%s' of workspace type -> '%s' is not specified. Using workspace root for document upload.", + workspace_name, + workspace_type, + ) + # we put the document into the root of the workspace: + parent_id = workspace_id + + # Check if we have sub-workspaces configured. These are dynamically created + # during the processing of bulk documents... + if ( + "sub_workspace_type" in workspace + and "sub_workspace_name" in workspace + ): + sub_workspace_type = workspace["sub_workspace_type"] + sub_workspace_type = self.replace_bulk_placeholders( + input_string=sub_workspace_type, + row=row, + replacements=replacements, + ) + sub_workspace_name = workspace["sub_workspace_name"] + sub_workspace_name = self.replace_bulk_placeholders( + input_string=sub_workspace_name, + row=row, + replacements=replacements, + ) + response = self._otcs_frontend.get_node_by_parent_and_name( + name=sub_workspace_name, parent_id=parent_id + ) + # Check if the sub-workspaces does already exist: + sub_workspace_id = self._otcs_frontend.get_result_value( + response, "id" + ) + if not sub_workspace_id: + logger.info( + "Creating sub workspace -> '%s' of type -> '%s' and parent -> %s...", + sub_workspace_name, + sub_workspace_type, + parent_id, + ) + sub_workspace_template = workspace.get( + "sub_workspace_template", "" + ) + + sub_workspace_template = self.replace_bulk_placeholders( + input_string=sub_workspace_template, + row=row, + replacements=replacements, + ) + # Now we try to determine the IDs for the sub-workspace type and template: + (sub_workspace_type_id, sub_workspace_template_id) = ( + self.determine_workspace_type_and_template_id( + workspace_type_name=sub_workspace_type, + workspace_template_name=sub_workspace_template, ) - success = False - break - logger.info("Get page -> %s", base_url + page) - if not browser_automation_object.get_page(url=page): + ) + # if either of the two couldn't be determined we cannot create the sub-workspace + if not sub_workspace_type_id or not sub_workspace_template_id: logger.error( - "Cannot get page -> %s. Stopping automation.", - page, + "Coudn't dertermine workspace template ID and workspace type ID of sub-workspace!", ) success = False - break + continue # for workspace in workspaces + + # Check if we have categories for the sub-workspace: + if not "categories" in workspace: + logger.info( + "Sub-Workspace payload has no category data! Will leave category attributes empty...", + ) + sub_workspace_category_data = {} else: - # browser_automation_object.implicit_wait(15.0) # this is global but not command-specific! Don't need it here! logger.info( - "Successfuly loaded page -> %s.", base_url + page + "Sub-Workspace payload has category data! Will prepare category data for workspace creation...", ) - case "click_elem": - elem = automation.get("elem", "") - if not elem: - logger.error( - "Automation type -> %s requires elem parameter", - automation_type, + worker_categories = self.process_bulk_categories( + row=row, + index=index, + categories=workspace["categories"], + replacements=replacements, ) - success = False - break - find = automation.get("find", "id") - if not browser_automation_object.find_elem_and_click( - find_elem=elem, find_method=find - ): - logger.error( - "Cannot find clickable element -> %s on current page. Stopping automation.", - elem, + logger.info( + "Prepare category data for sub-workspace with template -> %s and parent -> %s", + sub_workspace_template_id, + parent_id, ) - success = False - break - else: - # browser_automation_object.implicit_wait(15.0) # this is global but not command-specific! Don't need it here! - logger.info("Successfuly clicked element -> %s.", elem) - case "set_elem": - elem = automation.get("elem", "") - if not elem: - logger.error( - "Automation type -> %s requires elem parameter", - automation_type, + sub_workspace_category_data = ( + self.prepare_workspace_create_form( + categories=worker_categories, + template_id=sub_workspace_template_id, + parent_workspace_node_id=parent_id, + ) ) - success = False - break - find = automation.get("find", "id") - value = automation.get("value", "") - if not value: - logger.error( - "Automation type -> %s requires value parameter", - automation_type, + if not sub_workspace_category_data: + logger.error( + "Failed to prepare the category data for sub-workspace -> '%s'!", + sub_workspace_name, + ) + success = False + continue # for workspace in workspaces + # Now we create the sub-workspace: + response = self._otcs_frontend.create_workspace( + workspace_template_id=sub_workspace_template_id, + workspace_name=sub_workspace_name, + workspace_description="", + workspace_type=sub_workspace_type_id, + category_data=sub_workspace_category_data, + parent_id=parent_id, + show_error=False, + ) + if response is None: + # Potential race condition: see if the sub-workspace has been created by a concurrent thread. + # So we better check if the workspace is there even if the create_workspace() call delivered + # a 'None' response: + response = self._otcs_frontend.get_node_by_parent_and_name( + parent_id=parent_id, name=sub_workspace_name ) - success = False - break - # we also support replacing placeholders that are - # enclosed in double % characters like %%OTCS_RESOURCE_ID%%: - value = self.replace_placeholders(value) - if not browser_automation_object.find_elem_and_set( - find_elem=elem, elem_value=value, find_method=find - ): + sub_workspace_id = self._otcs_frontend.get_result_value( + response, "id" + ) + if not sub_workspace_id: logger.error( - "Cannot find element -> %s on current page to set value -> %s. Stopping automation.", - elem, - value, + "Failed to create sub-workspace -> '%s' with type ID -> %s!", + sub_workspace_name, + sub_workspace_type_id, ) success = False - break + continue # for workspace in workspaces else: logger.info( - "Successfuly set element -> %s to set value -> %s.", - elem, - value, + "Successfully created sub-workspace -> '%s' with ID -> %s", + sub_workspace_name, + sub_workspace_id, ) - case _: - logger.error( - "Illegal automation step type -> %s in browser automation!", - automation_type, - ) - success = False - break - - if check_status: - self.write_status_file(success, section_name, browser_automations) - - return success - - # end method definition - - def init_sap( - self, sap_external_system: dict, direct_application_server_login: bool = True - ) -> SAP | None: - """Initialize SAP object for RFC communication with SAP S/4HANA. - - Args: - sap_external_system (dict): SAP external system created before - direct_application_server_login (bool): flag to control wether we comminicate directly with - SAP application server or via a load balancer - Returns: - SAP: SAP object - """ - - if not sap_external_system: - return None - - username = sap_external_system["username"] - password = sap_external_system["password"] - # "external_system_hostname" is extracted from as_url in process_external_systems() - host = sap_external_system["external_system_hostname"] - client = sap_external_system.get("client", "100") - system_number = sap_external_system.get("external_system_number", "00") - system_id = sap_external_system["external_system_name"] - group = sap_external_system.get("group", "PUBLIC") - destination = sap_external_system.get("destination", "") - logger.info("Connection parameters SAP:") - logger.info("SAP Hostname = %s", host) - logger.info("SAP Client = %s", client) - logger.info("SAP System Number = %s", system_number) - logger.info("SAP System ID = %s", system_id) - logger.info("SAP User Name = %s", username) - if not direct_application_server_login: - logger.info("SAP Group Name (for RFC) = %s", group) - if destination: - logger.info("SAP Destination = %s", destination) + # Create Business Relationship between workspace and sub-workspace: + if workspace_id and sub_workspace_id: + # Check if workspace relationship does already exist in Extended ECM + # (this is an additional safety measure to avoid errors): + response = self._otcs_frontend.get_workspace_relationships( + workspace_id=workspace_id, + related_workspace_name=sub_workspace_name, + ) + current_workspace_relationships = ( + self._otcs.exist_result_item( + response, "id", sub_workspace_id + ) + ) + if current_workspace_relationships: + logger.info( + "Workspace relationship between workspace -> '%s' (%s) and sub-workspace -> '%s' (%s) does already exist. Skipping...", + workspace_name, + workspace_id, + sub_workspace_name, + sub_workspace_id, + ) + else: + logger.info( + "Create workspace relationship %s -> %s...", + workspace_id, + sub_workspace_id, + ) + response = self._otcs_frontend.create_workspace_relationship( + workspace_id=workspace_id, + related_workspace_id=sub_workspace_id, + show_error=False, # we don't want to show an error because of race conditions handled below + ) + if response: + logger.info( + "Successfully created Workspace Relationship between workspace ID -> %s and sub-workspace ID -> %s.", + workspace_id, + sub_workspace_id, + ) + else: + # Potential race condition: see if the workspace-2-sub-workspace relationship has been created by a concurrent thread. + # So we better check if the relationship is there even if the create_workspace_relationship() call delivered + # a 'None' response: + response = ( + self._otcs_frontend.get_workspace_relationships( + workspace_id=workspace_id, + related_workspace_name=sub_workspace_name, + ) + ) + current_workspace_relationships = ( + self._otcs.exist_result_item( + response, "id", sub_workspace_id + ) + ) + if not current_workspace_relationships: + logger.error( + "Failed to create Workspace Relationship between workspace ID -> %s and sub-workspace ID -> %s.", + workspace_id, + sub_workspace_id, + ) + else: + logger.info( + "Successfully created Workspace Relationship between workspace ID -> %s and sub-workspace ID -> %s.", + workspace_id, + sub_workspace_id, + ) + + # end if sub-workspace does not exist + else: + logger.info( + "Using existing sub workspace -> '%s' (%s) of type -> '%s'...", + sub_workspace_name, + str(sub_workspace_id), + sub_workspace_type, + ) - if direct_application_server_login: - logger.info("SAP Login = Direct Application Server (ashost)") - sap_object = SAP( - username=username, - password=password, - ashost=host, - client=client, - system_number=system_number, - system_id=system_id, - destination=destination, - ) - else: - logger.info("SAP Login = Logon with load balancing (mshost)") - sap_object = SAP( - username=username, - password=password, - mshost=host, - group=group, - client=client, - system_number=system_number, - system_id=system_id, - destination=destination, - ) + # + # Get the target folder in the sub-workspace by the provided sub workspace path + # - self._sap = sap_object + # "workspace_folder" is deprecated - we keep it for now to be + # backwards compatible: + sub_workspace_folder = workspace.get("sub_workspace_folder", "") + # we need to do a copy as the path list is a mutable data type that we modify below! + sub_workspace_path = list(workspace.get("sub_workspace_path", [])) + if not sub_workspace_path and sub_workspace_folder: + sub_workspace_path = [sub_workspace_folder] + if sub_workspace_path: + # replace placeholders in payload for the path elements: + result_placeholders = self.replace_bulk_placeholders_list( + input_list=sub_workspace_path, + row=row, + replacements=replacements, + ) + if not result_placeholders: + logger.warning( + "Sub-Workspace folder path for workspace -> '%s' of workspace type -> '%s' cannot be resolved (placeholder issue). Using workspace root for document upload.", + workspace_name, + workspace_type, + ) + # we put the document into the root of the workspace: + parent_id = sub_workspace_id + sub_workspace_path = None + else: + # Check if the folder path does already exist and get the target folder at the end of the path: + logger.info( + "Check if path -> %s does already exist in workspace -> '%s' (%s)... (otherwise create it)", + str(sub_workspace_path), + sub_workspace_name, + sub_workspace_id, + ) + response = self._otcs_frontend.get_node_by_workspace_and_path( + workspace_id=sub_workspace_id, + path=sub_workspace_path, + create_path=True, # we want the path to be created if it doesn't exist + show_error=False, + ) + parent_id = self._otcs_frontend.get_result_value( + response, "id" + ) + if not parent_id: + logger.error( + "Failed to create path -> %s in workspace -> '%s' (%s)...", + str(sub_workspace_path), + sub_workspace_name, + sub_workspace_id, + ) + success = False + continue # for workspace in workspaces + else: + logger.info( + "Successfully created path -> %s in sub-workspace -> '%s' (%s). Node ID for target folder -> %s", + str(sub_workspace_path), + sub_workspace_name, + sub_workspace_id, + str(parent_id), + ) + else: + logger.info( + "Folder path inside sub-workspace -> '%s' of workspace type -> '%s' is not specified. Using root of sub-workspace for document upload.", + sub_workspace_name, + sub_workspace_type, + ) + # we put the document into the root of the workspace: + parent_id = sub_workspace_id - return sap_object + if sub_workspace_path: + logger.info( + "Check if document -> '%s' is already in target folder -> %s in sub-workspace -> '%s' (parent ID -> %s)...", + document_name, + str(sub_workspace_path), + sub_workspace_name, + parent_id, + ) + else: + logger.info( + "Check if document -> '%s' is already in root of sub-workspace -> '%s' (parent ID -> %s)...", + document_name, + sub_workspace_name, + parent_id, + ) + # end if sub_workspace specified + else: # no sub-workspace used! + sub_workspace_id = None + if workspace_path: + logger.info( + "Check if document -> '%s' is already in target folder -> %s in workspace -> '%s' (parent ID -> %s)...", + document_name, + str(workspace_path), + workspace_name, + parent_id, + ) + else: + logger.info( + "Check if document -> '%s' is already in root of workspace -> '%s' (parent ID -> %s)...", + document_name, + workspace_name, + parent_id, + ) - # end method definition + # + # Create the document in the target folder specified by parent_id: + # - def process_sap_rfcs(self, sap_object: SAP, section_name: str = "sapRFCs") -> bool: - """Process SAP RFCs in payload and run them in SAP S/4HANA. + # check if a document with the same name does already exist: + response = self._otcs_frontend.get_node_by_parent_and_name( + name=document_name, parent_id=parent_id + ) + document_id = self._otcs_frontend.get_result_value(response, "id") - Args: - sap_object (SAP): SAP object - section_name (str, optional): name of the section. It can be overridden - for cases where multiple sections of same type - are used (e.g. the "Post" sections) - This name is also used for the "success" status - files written to the Admin Personal Workspace - Returns: - bool: True if payload has been processed without errors, False otherwise - """ + # Check if the document does exists with an old name + # For this we expect a "key" value to be defined for the + # bulk document and one of the category / attribute item + # to be marked with "is_key" = True. + if not document_id and key: + key_attribute = next( + ( + cat_attr + for cat_attr in categories + if cat_attr.get("is_key", False) is True + ), + None, + ) + if key_attribute: + cat_name = key_attribute.get("name", None) + att_name = key_attribute.get("attribute", None) + logger.info( + "Try to find existing document with the key value -> '%s' in category -> '%s' and attribute -> '%s' in folder with ID -> %s...", + key, + cat_name, + att_name, + parent_id, + ) + response = self._otcs_frontend.lookup_node( + parent_node_id=parent_id, + category=cat_name, + attribute=att_name, + value=key, + ) + document_id = self._otcs_frontend.get_result_value( + response, "id" + ) + else: + logger.error( + "Bulk Document has a key -> '%s' defined but none of the category attributes is marked as a key attribute ('is_key' is missing)!", + key, + ) - if not sap_object: - logger.info("SAP object is undefined. Cannot call RFCs. Bailing out.") - return False + document_external_modify_date = self._otcs_frontend.get_result_value( + response, "external_modify_date" + ) + + if not document_id: + # The document does not exist in Extended ECM - so we + # upload it now: + + # If category data is in payload we substitute + # the values with data from the current data row: + if categories: + # Make sure the threads are not changing data structures that + # are shared between threads. categories is a list of dicts. + # list and dicts are "mutable" data structures in Python! + worker_categories = self.process_bulk_categories( + row=row, + index=index, + categories=categories, + replacements=replacements, + ) + document_category_data = self.prepare_category_data( + categories_payload=worker_categories, + source_node_id=parent_id, + ) + # end if categories + else: + document_category_data = {} - # If this payload section has been processed successfully before we - # can return True and skip processing it once more: - if self.check_status_file(section_name): - return True + logger.info( + "Uploading document -> '%s' (file -> '%s', mime type -> '%s', description -> '%s') to parent with ID -> %s. Size -> %s", + document_name, + file_name, + mime_type, + description, + parent_id, + os.path.getsize(file_name), + ) + response = self._otcs_frontend.upload_file_to_parent( + file_url=file_name, + file_name=document_name, + mime_type=mime_type, + parent_id=int(parent_id), + category_data=document_category_data, + description=description, + external_create_date=external_create_date, + external_modify_date=external_modify_date, + show_error=False, + ) + document_id = self._otcs_frontend.get_result_value(response, "id") + if not document_id: + # We may have a race condition here. Double check the document does not yet exist: + response = self._otcs_frontend.get_node_by_parent_and_name( + parent_id=int(parent_id), name=document_name + ) + document_id = self._otcs_frontend.get_result_value( + response, "id" + ) + if not document_id: + if sub_workspace_id: + logger.error( + "Cannot upload document -> '%s' ('%s') to folder path -> '%s' in sub-workspace -> '%s' (parent ID = %s)", + document_name, + download_name, + sub_workspace_path, + sub_workspace_name, + parent_id, + ) + else: + logger.error( + "Cannot upload document -> '%s' ('%s') to folder path -> '%s' in workspace -> '%s' (parent ID = %s)", + document_name, + download_name, + workspace_path, + workspace_name, + parent_id, + ) + success = False + continue + # end if not workspace_id - success: bool = True + # If updates are enforced we update the existing document with + # a new document version and with fresh metadata from the payload: + elif enforce_updates and OTCS.date_is_newer( + date_old=document_external_modify_date, + date_new=external_modify_date, + ): + # If category data is in payload we substitute + # the values with data from the current data row: + if categories: + # Make sure the threads are not changing data structures that + # are shared between threads. categories is a list of dicts. + # list and dicts are "mutable" data structures in Python! + worker_categories = self.process_bulk_categories( + row=row, + index=index, + categories=categories, + replacements=replacements, + ) + document_category_data = self.prepare_category_data( + categories_payload=worker_categories, + source_node_id=document_id, + ) + # end if categories + else: + document_category_data = {} - for sap_rfc in self._sap_rfcs: - rfc_name = sap_rfc["name"] + logger.info( + "Updating existing document -> '%s' (file -> '%s', mime type -> '%s', description -> '%s') with document ID -> %s. Size -> %s", + document_name, + file_name, + mime_type, + description, + document_id, + os.path.getsize(file_name), + ) + response = self._otcs_frontend.add_document_version( + node_id=document_id, + file_url=file_name, + file_name=document_name, + mime_type=mime_type, + description=description, + ) + if not response: + logger.error( + "Failed to add new version to document -> '%s' (%s)", + document_name, + document_id, + ) + success = False + continue + response = self._otcs_frontend.update_item( + node_id=document_id, + parent_id=None, # None = do not move item + item_name=document_name, + item_description=description, + category_data=document_category_data, + external_create_date=external_create_date, + external_modify_date=external_modify_date, + ) + if not response: + logger.error( + "Failed to update metadata of document -> '%s' (%s) with metadata -> %s", + document_name, + document_id, + str(document_category_data), + ) + success = False + continue + # nickname has been calculated for existence test above + # we now assign it to the new document + if nickname: + response = self._otcs_frontend.set_node_nickname( + node_id=document_id, nickname=nickname, show_error=True + ) + if not response: + logger.error( + "Failed to assign nickname -> '%s' to document -> '%s'", + nickname, + document_name, + ) - # Check if element has been disabled in payload (enabled = false). - # In this case we skip the element: - if "enabled" in sap_rfc and not sap_rfc["enabled"]: - logger.info( - "Payload for SAP RFC -> %s is disabled. Skipping...", rfc_name - ) - continue + # end for workspaces - rfc_description = ( - sap_rfc["description"] if sap_rfc.get("description") else "" - ) + if not success: + # check if the parent_id is set. + if parent_id is None: + parent_id = "could not get id" - # be careful to avoid key errors as SAP RFC parameters are optional: - rfc_params = sap_rfc["parameters"] if sap_rfc.get("parameters") else {} - if rfc_params: + logger.error( + "Failed to bulk upload document -> '%s' to parent folder with ID -> %s!", + document_name, + parent_id, + ) + result["success"] = False + result["failure_counter"] += 1 + elif ( + document_id is not None + ): # it can be None if the workspace name failed to resolve logger.info( - "Calling SAP RFC -> %s (%s) with parameters -> %s ...", - rfc_name, - rfc_description, - rfc_params, + "Successfully uploaded bulk document -> '%s' with ID -> %s", + document_name, + document_id, ) + result["success_counter"] += 1 + # Record the workspace name and ID to allow to read it from failure file + # and speedup the process. + result["documents"][document_name] = document_id else: logger.info( - "Calling SAP RFC -> %s (%s) without parameters...", - rfc_name, - rfc_description, + "Bulk document -> '%s' was not uploaded to any workspace.", + document_name, ) - # be careful to avoid key errors as SAP RFC parameters are optional: - rfc_call_options = ( - sap_rfc["call_options"] if sap_rfc.get("call_options") else {} - ) - if rfc_call_options: - logger.debug("Using call options -> %s ...", rfc_call_options) + # Make sure no temp documents are piling up except + # we want it (e.g. if using cloud document storage): + if os.path.exists(file_name) and delete_download: + os.remove(file_name) - result = sap_object.call(rfc_name, rfc_call_options, rfc_params) - if result is None: - logger.error("Failed to call SAP RFC -> %s", rfc_name) - success = False - elif result.get("RESULT") != "OK": - logger.error( - "Result of SAP RFC -> %s is not OK, it returned -> %s failed items in result -> %s", - rfc_name, - str(result.get("FAILED")), - str(result), - ) - success = False - else: - logger.info( - "Successfully called RFC -> %s. Result -> %s", rfc_name, str(result) - ) - # Save result for status file content - sap_rfc["result"] = result + logger.info("End working...") - self.write_status_file(success, section_name, self._sap_rfcs) + results.append(result) - return success + return - # end method definition + # end method definition - def init_salesforce(self, salesforce_external_system: dict) -> Salesforce | None: - """Initialize Salesforce object for workspace creation. This is needed to query Salesforce API - to lookup IDs of Salesforce objects. + def replace_bulk_placeholders_list( + self, + input_list: list, + row: pd.Series, + index: int = 0, + replacements: dict | None = None, + additional_regex_list: list | None = None, + ): + """Wrapper method to process list of payload strings and replace placeholders (see next method) Args: - salesfoce_external_system (dict): Salesforce external system created before + input_list (list): list of strings that contain placeholders + row (pd.Series): curent row (DataFrame series / row) + index (int): Index for use if we encounter a list value + replacements (dict): Replacements to apply to given fields (dictionary key = field name) + additional_regex_list (list, optional): These are not coming from the payload but dynamically + added for special needs like determining the nicknames. Returns: - Salesforce: Salesforce object + bool: True = all replacements worked, False = some replacements had lookup errors """ - if not salesforce_external_system: - return None - - username = salesforce_external_system["username"] - password = salesforce_external_system["password"] - base_url = salesforce_external_system["base_url"] - authorization_url = salesforce_external_system.get("token_endpoint", "") - client_id = salesforce_external_system["oauth_client_id"] - client_secret = salesforce_external_system["oauth_client_secret"] + success = True - logger.info("Connection parameters Salesforce:") - logger.info("Salesforce base URL = %s", base_url) - logger.info("Salesforce authorization URL = %s", base_url) - logger.info("Salesforce username = %s", username) - logger.debug("Salesforce password = %s", password) - logger.info("Salesforce client ID = %s", client_id) - logger.debug("Salesforce client secret = %s", client_secret) - salesforce_object = Salesforce( - base_url=base_url, - client_id=client_id, - client_secret=client_secret, - username=username, - password=password, - authorization_url=authorization_url, - ) + for i, value in enumerate(input_list): + input_list[i] = self.replace_bulk_placeholders( + input_string=value, + row=row, + index=index, + replacements=replacements, + additional_regex_list=additional_regex_list, + ) + if not input_list[i]: + success = False - self._salesforce = salesforce_object + return success - return salesforce_object + # end method definition - # end method definition + def replace_bulk_placeholders( + self, + input_string: str, + row: pd.Series, + index: int | None = 0, + replacements: dict | None = None, + additional_regex_list: list | None = None, + ) -> str: + """Replace placeholders like "{variable.subvalue}" in payload of bulk processing. - def get_payload(self) -> dict: - """Get the Payload""" - return self._payload + Args: + input_string (str): the string to replace placeholders in + row (pd.Series): curent row (DataFrame series / row) + index (int): Index for use if we encounter a list value. + If index is "None" then we return the complete list as value + replacements (dict): Replacements to apply to given fields (dictionary key = field name) + additional_regex_list (list, optional): These are not coming from the payload but dynamically + added for special needs like determining the nicknames. + Returns: + str: updated string with replacements + """ - def get_users(self) -> list: - """Get all useres""" - return self._users + # XML data sources may include "@" in Pandas column names as well! + # This happens if the XML elements have attribute. + # pattern = r"\{([\w@]+(\.[\w@]+)*)\}" + # pattern = r"\{(\w+(\.\w+)*)\}" + # Adjust Pattern to allow any sequence of characters withint the {...} + # pattern = r"\{([^}]*)\}" + # non-greedy match of placeholders that are surrounded by curly braces: + pattern = r"\{(.*?)\}" + + had_lookup_error = False + + # Define a function to replace placeholders. This + # function is called by re.sub() for each pattern match below. + def replace(match): + # we want to change the variable of the main method + nonlocal had_lookup_error + + field_name = match.group(1) + # split up the keys at ".", e.g. cm_vehicles.make + keys = field_name.split(".") # Splitting the variable and sub-value + # we initialize value with the data frame row (pd.Series): + value = row + # Walk through the list of keys: + for key in keys: + # first we access the field in the row and handle the + # exception that key may not be a valid column (KeyError): + try: + # read the value of the column defined by key + value = value[key] + except KeyError as e: + logger.warning( + "KeyError: Cannot replace field -> '%s'%s as the row does not have a column called '%s': %s", + field_name, + " (sub-key -> '{}')".format(key) if key != field_name else "", + field_name, + str(e), + ) + had_lookup_error = True + return "" + except TypeError as e: + logger.error( + "TypeError: Cannot replace field -> '%s' (value type -> %s). Expecting a dictionary-like value: %s", + field_name, + str(type(value)), + str(e), + ) + had_lookup_error = True + return "" + + # if the returned value is a list we use the index parameter + # to select the item in the list according to the given index + # We handle the exception that index may be out of range for + # the list (IndexError). + # If the given index is None we return the whole list. This + # is required for multi-value attributes. + if isinstance(value, list) and index is not None and len(value) > 0: + try: + value = value[index] + except IndexError as e: + logger.error( + "Error in replacement of list field -> '%s' using index -> %s (IndexError): %s", + field_name, + str(index), + str(e), + ) + had_lookup_error = True + return "" - def get_groups(self) -> list: - """Get all groups""" - return self._groups + if isinstance(value, list): + if value == []: + had_lookup_error = True + return "" + else: + if pd.isnull(value): + had_lookup_error = True + return "" + value = str(value) + + if replacements and field_name in replacements: + # replacements is a dictionary that is defined + # in the payload. Each item is a dictionary + # that can be looked up by the field name + field_replacements = replacements[field_name] + upper = field_replacements.get("upper_case", False) + lower = field_replacements.get("lower_case", False) + regex_list = field_replacements.get("regex_list", []) + else: + regex_list = [] + upper = False + lower = False + + if additional_regex_list: + regex_list = ( + regex_list + additional_regex_list + ) # don't do an append here as it would modify the original list + + if regex_list or upper or lower: + if not isinstance(value, list): + value = self.cleanup_value( + cleanup_value=value, + regex_list=regex_list, + upper=upper, + lower=lower, + ) + else: + for v in value: + v = self.cleanup_value( + cleanup_value=v, + regex_list=regex_list, + upper=upper, + lower=lower, + ) - def get_workspaces(self) -> list: - """Get all workspaces""" - return self._workspaces + value = str(value) - def get_otcs_frontend(self) -> object: - """Get OTCS Frontend oject""" - return self._otcs_frontend + return value + # end sub-method replace() - def get_otcs_backend(self) -> object: - """Get OTCS Backend object""" - return self._otcs_backend + # Use re.sub() to replace placeholders using the defined function + # replace() - see above. + result_string = re.sub(pattern, replace, input_string) - def get_otds(self) -> object: - """Get OTDS object""" - return self._otds + if had_lookup_error: + return "" - def get_k8s(self) -> object: - """Get K8s object""" - return self._k8s + return result_string - def getM365(self) -> object: - """Get M365 object""" - return self._m365 + # end method definition - def generate_password( + def cleanup_value( self, - length: int, - use_special_chars: bool = False, - min_special: int = 1, - min_numerical: int = 1, - min_upper: int = 1, - min_lower: int = 1, - override_special: str = None, - ): - """Function to generate random passwords with a given specification + cleanup_value: str, + regex_list: list, + upper: bool = False, + lower: bool = False, + ) -> str: + """Cleanup field values based on regular expressions Args: - length (int): Define password length - use_special_chars (bool, optional): Define if special characters should be used. Defaults to False. - min_special (int, optional): Define min amount of special characters. Defaults to 1. - min_numerical (int, optional): Define if numbers should be used. Defaults to 1. - min_upper (int, optional): Define mininum number of upper case letters. Defaults to 1. - min_lower (int, optional): Define minimum number of lower case letters. Defaults to 1. - override_special (string | None, optional): Define special characters to be used, if not set: !@#$%^&*()_-+=<>?/{}[]. Defaults to None. - - Raises: - ValueError: _description_ - + cleanup_value (str): string to clean up + regex_list (list): list of regular expressions to apply + upper (bool, optional): convert name to upper case letters + lower (bool, optional): convert name to lower case letters Returns: - _type_: _description_ + str: cleaned string """ - # Define character sets - lowercase_letters = string.ascii_lowercase - uppercase_letters = string.ascii_uppercase - numerical_digits = string.digits - special_characters = "!@#$%^&*()_-+=<>?/{}[]" - if override_special: - special_characters = override_special - # Ensure minimum requirements are met + cleaned_string = cleanup_value - if min_special + min_numerical + min_upper + min_lower > length: - raise ValueError("Minimum requirements exceed password length") + if upper: + cleaned_string = cleaned_string.upper() + if lower: + cleaned_string = cleaned_string.lower() - # Initialize the password - password = [] + if regex_list: + try: + for regex in regex_list: + # We use the pipe symbol to divide patterns from replacements + # this is a short-hand syntax to keepit simple. If there's + # no pipe in regex string and than we remove the pattern + # from the string + parts = regex.split("|") + pattern = parts[0] + replacement = "" # Treat replacement as empty if no pipe specified + if len(parts) > 1: + pattern = r"\b" + pattern + r"\b" # Match whole words only + replacement = parts[1] + cleaned_string = re.sub(pattern, replacement, cleaned_string) + except re.error as e: + logger.error( + "Invalid regular expression pattern -> %s", + str(e), + ) + return cleanup_value - # Add required characters - password.extend(random.sample(lowercase_letters, min_lower)) - password.extend(random.sample(uppercase_letters, min_upper)) - password.extend(random.sample(numerical_digits, min_numerical)) + return cleaned_string - if use_special_chars: - password.extend(random.sample(special_characters, min_special)) + # end method definition - # Fill the rest of the password with random characters - remaining_length = length - len(password) - all_chars = lowercase_letters + uppercase_letters + numerical_digits + def evaluate_conditions( + self, conditions: list, row: pd.Series, replacements: dict | None = None + ) -> bool: + """Evaluate given conditions for a DataFrame series (i.e. a row). - if use_special_chars: - all_chars += special_characters + Args: + conditions (list): list of dictionaries that have a "field" (mandatory) + and an "value" (optional) element. + row (pd.Series): current data row to pull data from + (coming from the JSON file) + replacements (dict): Replacements to apply to given fields (dictionary key = field name) + Returns: + bool: True if all given conditions evaluate to True. False otherwise. + """ - password.extend(random.choices(all_chars, k=remaining_length)) + evaluated_condition = True - # Shuffle the password to ensure randomness - random.shuffle(password) + # We traverse a list of conditions. All conditions must evaluate to true + # otherwise the current workspace or document (i.e. the data set for these objects) + # will be skipped: + for condition in conditions: + field = condition.get("field", None) + if not field: + logger.error( + "Missing field in condition.", + ) + evaluated_condition = False + break + field_value = self.replace_bulk_placeholders( + input_string=field, row=row, replacements=replacements + ) + logger.debug( + "Evaluated field name -> '%s' to '%s'", + field, + field_value, + ) + # we have 3 options for value: + # a) it does not exist in payload - then just the existance of the field is tested + # b) it is a string - then we compare it 1:1 with the field value + # c) it is a list of string - then the condition is met if one or more + # of the list values is equal to the field value + value = condition.get("value", None) + if not value: + # if there's no "value" element in the payload + # this means that we just check the existance of the field + if field_value: + # field does exist and has any non-"" value ==> condition met! + continue + else: + # field does not exist ==> condition not met! + evaluated_condition = False + break + # + # we handle string, boolean, and list values: + # + if isinstance(value, str) and (value != str(field_value)): + logger.debug( + "String value -> '%s' is not equal to field value -> '%s'. Condition not met for field -> '%s'!", + value, + field_value, + field, + ) + evaluated_condition = False + break + elif isinstance(value, bool) and ( + str(value) != str(field_value) + ): # don't do bool(field_value) as it returns True for any non-empty string! + logger.debug( + "Boolean value -> '%s' is not equal to field value -> '%s'. Condition not met for field -> '%s'!", + value, + field_value, + field, + ) + evaluated_condition = False + break + elif isinstance(value, list): + for value_item in value: + if value_item == field_value: + break + else: # just executed if the for loop is not interrupted by break + logger.debug( + "Value list -> %s does not include field value -> '%s'. Condition not met!", + str(value), + field_value, + ) + evaluated_condition = False - # Convert the password list to a string - final_password = "".join(password) + return evaluated_condition - return final_password + # end method definition diff --git a/pyxecm/customizer/pht.py b/pyxecm/customizer/pht.py new file mode 100644 index 0000000..000d81d --- /dev/null +++ b/pyxecm/customizer/pht.py @@ -0,0 +1,503 @@ +""" +PHT is an OpenText internal application aiming at creating a common naming reference for Engineering Products and +track all product-related data. It also provides an approved reporting hierarchy. +See: https://pht.opentext.com + +Class: PHT +Methods: + +__init__ : class initializer +config : Returns config data set +get_data: Get the Data object that holds all processed PHT products +request_header: Returns the request header for ServiceNow API calls +parse_request_response: Parse the REST API responses and convert + them to Python dict in a safe way + +authenticate : Authenticates at ServiceNow API + +get_attributes: Get a list of all product attributes (schema) of PHT +get_business_units: Get the list of PHT Business Units +get_product_families: Get the list of PHT product families +get_products: Get the list of PHT products +get_master_products: Get the list of PHT master products +filter_products: Get a list of filtered PHT products +load_products: Load products into a data frame. + +""" + +__author__ = "Dr. Marc Diefenbruch" +__copyright__ = "Copyright 2024, OpenText" +__credits__ = ["Kai-Philip Gatzweiler"] +__maintainer__ = "Dr. Marc Diefenbruch" +__email__ = "mdiefenb@opentext.com" + +import json +import logging + +import requests +from requests.auth import HTTPBasicAuth +from pyxecm.helper.data import Data + +logger = logging.getLogger("pyxecm.customizer.pht") + +REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"} + +REQUEST_TIMEOUT = 60 + + +class PHT(object): + """Used to retrieve data from OpenText PHT.""" + + _config: dict + _session = None + + def __init__( + self, + base_url: str, + username: str, + password: str, + ): + """Initialize the PHT object + + Args: + base_url (str): base URL of the ServiceNow tenant + username (str): user name in Saleforce + password (str): password of the user + """ + + pht_config = {} + + # Store the credentials and parameters in a config dictionary: + pht_config["baseUrl"] = base_url + pht_config["username"] = username + pht_config["password"] = password + + pht_config["restUrl"] = pht_config["baseUrl"] + "/api" + pht_config["attributeUrl"] = pht_config["restUrl"] + "/attribute" + pht_config["businessUnitUrl"] = pht_config["restUrl"] + "/business-unit" + pht_config["productFamilyUrl"] = pht_config["restUrl"] + "/product-family" + pht_config["productUrl"] = pht_config["restUrl"] + "/product" + pht_config["searchUrl"] = pht_config["productUrl"] + "/product/search" + pht_config["teamUrl"] = pht_config["restUrl"] + "/team" + pht_config["componentUrl"] = pht_config["restUrl"] + "/component" + pht_config["masterProductUrl"] = pht_config["restUrl"] + "/master-product" + + self._config = pht_config + + self._session = requests.Session() + + self._data = Data() + + # end method definition + + def config(self) -> dict: + """Returns the configuration dictionary + + Returns: + dict: Configuration dictionary + """ + return self._config + + # end method definition + + def get_data(self) -> Data: + """Get the Data object that holds all processed PHT products + + Returns: + Data: Datastructure with all processed PHT product data. + """ + + return self._data + + # end method definition + + def request_header(self, content_type: str = "") -> dict: + """Returns the request header used for Application calls. + Consists of Bearer access token and Content Type + + Args: + content_type (str, optional): custom content type for the request + Return: + dict: request header values + """ + + request_header = {} + + request_header = REQUEST_HEADERS + + if content_type: + request_header["Content-Type"] = content_type + + return request_header + + # end method definition + + def parse_request_response( + self, + response_object: requests.Response, + additional_error_message: str = "", + show_error: bool = True, + ) -> list | None: + """Converts the request response (JSon) to a Python list in a safe way + that also handles exceptions. It first tries to load the response.text + via json.loads() that produces a dict output. Only if response.text is + not set or is empty it just converts the response_object to a dict using + the vars() built-in method. + + Args: + response_object (object): this is reponse object delivered by the request call + additional_error_message (str, optional): use a more specific error message + in case of an error + show_error (bool): True: write an error to the log file + False: write a warning to the log file + Returns: + list: response information or None in case of an error + """ + + if not response_object: + return None + + try: + if response_object.text: + list_object = json.loads(response_object.text) + else: + list_object = vars(response_object) + except json.JSONDecodeError as exception: + if additional_error_message: + message = "Cannot decode response as JSON. {}; error -> {}".format( + additional_error_message, exception + ) + else: + message = "Cannot decode response as JSON; error -> {}".format( + exception + ) + if show_error: + logger.error(message) + else: + logger.warning(message) + return None + else: + return list_object + + # end method definition + + def authenticate(self) -> str | None: + """Authenticate at PHT with basic authentication.""" + + self._session.headers.update(self.request_header()) + + username = self.config()["username"] + password = self.config()["password"] + if not self._session: + self._session = requests.Session() + self._session.auth = HTTPBasicAuth(username, password) + + return self._session.auth + + # end method definition + + def get_attributes(self) -> list | None: + """Get a list of all product attributes (schema) of PHT + + Returns: + list | None: list of product attributes + + Example: + [ + { + 'id': 28, + 'uuid': '43ba5852-eb83-11ed-a752-00505682262c', + 'name': 'UBM SCM Migration JIRA/ValueEdge', + 'description': 'Identifies the Issue to track work for the SCM migration for this project.\nIts a free text field and no validation with JIRA/ValueEdge will take place', + 'type': 'TEXT', + 'attributeCategory': { + 'id': 2, + 'name': 'Auxiliary assignment' + }, + 'showDefault': False, + 'restricted': True, + 'allowScopeChain': True, + 'visibleToAll': False, + 'deleted': False, + 'attributeOptions': [], + 'attributeScopes': [], + 'allowedTeams': [] + } + ] + """ + + request_header = self.request_header() + request_url = self.config()["attributeUrl"] + + retries = 0 + + while True: + response = self._session.get(url=request_url, headers=request_header) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate() + retries += 1 + else: + logger.error( + "Failed to get PHT attributes; error -> %s (%s)", + response.text, + response.status_code, + ) + return None + + # end method definition + + def get_business_units(self) -> list | None: + """Get the list of PHT Business Units + + Returns: + list | None: list of the known business units. + + Example: + [ + { + 'id': 1, + 'name': 'Content Services', + 'leaderModel': { + 'id': 219, + 'domain': 'mcybala', + 'email': 'mcybala@opentext.com', + 'name': 'Michael Cybala', + 'role': None, + 'status': 'ACTIVE', + 'location': 'Kempten, DEU', + 'title': 'VP, Software Engineering', + 'type': 'OTHERS' + }, + 'pmLeaderModel': { + 'id': 350, + 'domain': 'mdiefenb', + 'email': 'mdiefenb@opentext.com', + 'name': 'Marc Diefenbruch', + 'role': None, + 'status': 'ACTIVE', + 'location': 'Virtual, DEU', + 'title': 'VP, Product Management', + 'type': 'OTHERS' + }, + 'sltOwnerModel': { + 'id': 450, + 'domain': 'jradko', + 'email': 'jradko@opentext.com', + 'name': 'John Radko', + 'role': None, + 'status': 'ACTIVE', + 'location': 'Gaithersburg, MD, USA', + 'title': 'SVP, Software Engineering', + 'type': 'OTHERS' + }, + 'status': 'ACTIVE', + 'engineering': True, + 'attributes': [{...}, {...}, {...}, {...}, {...}, {...}, {...}, {...}, {...}], + 'leader': 'Michael Cybala', + 'leaderDomain': 'mcybala', + 'pmLeader': 'Marc Diefenbruch', + 'pmLeaderDomain': 'mdiefenb', + 'sltOwner': 'John Radko', + 'sltOwnerDomain': 'jradko' + } + ] + """ + + request_header = self.request_header() + request_url = self.config()["businessUnitUrl"] + + retries = 0 + + while True: + response = self._session.get(url=request_url, headers=request_header) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate() + retries += 1 + else: + logger.error( + "Failed to get PHT business units; error -> %s (%s)", + response.text, + response.status_code, + ) + return None + + # end method definition + + def get_product_families(self) -> list | None: + """Get the list of PHT product families + + Returns: + list | None: list of the known product families. + """ + + request_header = self.request_header() + request_url = self.config()["productFamilyUrl"] + + retries = 0 + + while True: + response = self._session.get(url=request_url, headers=request_header) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate() + retries += 1 + else: + logger.error( + "Failed to get PHT product families; error -> %s (%s)", + response.text, + response.status_code, + ) + return None + + # end method definition + + def get_products(self) -> list | None: + """Get the list of PHT products + + Returns: + list | None: list of the known products. + """ + + request_header = self.request_header() + request_url = self.config()["productUrl"] + + retries = 0 + + while True: + response = self._session.get(url=request_url, headers=request_header) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate() + retries += 1 + else: + logger.error( + "Failed to get PHT products; error -> %s (%s)", + response.text, + response.status_code, + ) + return None + + # end method definition + + def get_master_products(self) -> list | None: + """Get the list of PHT master products + + Returns: + list | None: list of the known master products. + """ + + request_header = self.request_header() + request_url = self.config()["masterProductUrl"] + + retries = 0 + + while True: + response = self._session.get(url=request_url, headers=request_header) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate() + retries += 1 + else: + logger.error( + "Failed to get PHT master products; error -> %s (%s)", + response.text, + response.status_code, + ) + return None + + # end method definition + + def filter_products(self, filter_definition: dict | None = None) -> list | None: + """Get a list of filtered PHT products + + Args: + filter_definition (dict): a dictionary of filter conditions. + Example filters: + businessUnitName: + productFamilyName: + productName: + productSyncId: + productStatus: ACTIVE | INACTIVE | MAINTENANCE + productManager: + developmentManager: + attributeOperator: AND | OR + attributes: { + "": { + "compare": CONTAINS | EXISTS | DOES_NOT_EXISTS, + "values": List + }, + ... + }, + includeAttributes: true | false + Returns: + list | None: list of matching products. + """ + + if not filter_definition: + return self.get_products() + + request_header = self.request_header() + request_url = self.config()["productUrl"] + "/filtered" + request_data = filter_definition + + retries = 0 + + while True: + response = self._session.post( + url=request_url, headers=request_header, json=request_data + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate() + retries += 1 + else: + logger.error( + "Failed to get PHT master products; error -> %s (%s)", + response.text, + response.status_code, + ) + return None + + # end method definition + + def load_products(self, product_list: list = None) -> bool: + """Load products into a data frame in the self._data object + + Args: + product_list (list, optional): listn of products - if already avaiable. Defaults to None. + + Returns: + bool: True if successful, False otherwise. + """ + + if not product_list: + product_list = self.get_products() + + self._data = Data(product_list) + + if self._data: + return True + + return False + + # end method definition diff --git a/pyxecm/customizer/salesforce.py b/pyxecm/customizer/salesforce.py index a9a752d..cbb7eb7 100644 --- a/pyxecm/customizer/salesforce.py +++ b/pyxecm/customizer/salesforce.py @@ -1,5 +1,6 @@ """ Salesforce Module to interact with the Salesforce API +See: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_rest.htm Class: Salesforce Methods: @@ -14,15 +15,29 @@ of the Salesforce API call get_result_value: Check if a defined value (based on a key) is in the Salesforce API response -authenticate : Authenticates at Salesforce API - -get_user: Get a Salesforce user based on its ID. -add_user: Add a new Salesforce user. +authenticate: Authenticates at Salesforce API +get_object_id_by_name: Get the ID of a given Salesforce object with a given type and name get_object: Get a Salesforce object based on a defined field value and return selected result fields. add_object: Add object to Salesforce. This is a generic wrapper method for the actual add methods. + +get_group: Get a Salesforce group based on its ID. +add_group: Add a new Salesforce group. +update_group: Update a Salesforce group. +get_group_members: Get Salesforce group members +add_group_member: Add a user or group to a Salesforce group + +get_all_user_profiles: Get all user profiles +get_user_profile_id: Get a user profile ID by profile name +get_user_id: Get a user ID by user name +get_user: Get a Salesforce user based on its ID. +add_user: Add a new Salesforce user. +update_user: Update a Salesforce user. +update_user_password: Update the password of a Salesforce user. +update_user_photo: update the Salesforce user photo. + add_account: Add a new Account object to Salesforce. add_product: Add a new Product object to Salesforce. add_opportunity: Add a new Opportunity object to Salesfoce. @@ -38,6 +53,7 @@ __maintainer__ = "Dr. Marc Diefenbruch" __email__ = "mdiefenb@opentext.com" +import os import json import logging @@ -46,12 +62,13 @@ logger = logging.getLogger("pyxecm.customizer.salesforce") -request_login_headers = { +REQUEST_LOGIN_HEADERS = { "Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json", } REQUEST_TIMEOUT = 60 +SALESFORCE_API_VERSION = "v60.0" class Salesforce(object): """Used to retrieve and automate stettings in Salesforce.""" @@ -84,21 +101,56 @@ def __init__( security_token (str, optional): security token for Salesforce login """ + # The instance URL is also returned by the authenticate call + # but typically it is identical to the base_url. + self._instance_url = base_url + salesforce_config = {} - # Set the authentication endpoints and credentials - salesforce_config["baseUrl"] = base_url + # Store the credentials and parameters in a config dictionary: salesforce_config["clientId"] = client_id salesforce_config["clientSecret"] = client_secret salesforce_config["username"] = username salesforce_config["password"] = password salesforce_config["securityToken"] = security_token + + # Set the Salesforce URLs and REST API endpoints: + salesforce_config["baseUrl"] = base_url + salesforce_config["objectUrl"] = salesforce_config[ + "baseUrl" + ] + "/services/data/{}/sobjects/".format(SALESFORCE_API_VERSION) + salesforce_config["queryUrl"] = salesforce_config[ + "baseUrl" + ] + "/services/data/{}/query/".format(SALESFORCE_API_VERSION) + salesforce_config["compositeUrl"] = salesforce_config[ + "baseUrl" + ] + "/services/data/{}/composite/".format(SALESFORCE_API_VERSION) + salesforce_config["connectUrl"] = salesforce_config[ + "baseUrl" + ] + "/services/data/{}/connect/".format(SALESFORCE_API_VERSION) + salesforce_config["toolingUrl"] = salesforce_config[ + "baseUrl" + ] + "/services/data/{}/tooling/".format(SALESFORCE_API_VERSION) if authorization_url: salesforce_config["authenticationUrl"] = authorization_url else: salesforce_config["authenticationUrl"] = ( salesforce_config["baseUrl"] + "/services/oauth2/token" ) + # URLs that are based on the objectURL (sobjects/): + salesforce_config["userUrl"] = salesforce_config["objectUrl"] + "User/" + salesforce_config["groupUrl"] = salesforce_config["objectUrl"] + "Group/" + salesforce_config["groupMemberUrl"] = ( + salesforce_config["objectUrl"] + "GroupMember/" + ) + salesforce_config["accountUrl"] = salesforce_config["objectUrl"] + "Account/" + salesforce_config["productUrl"] = salesforce_config["objectUrl"] + "Product2/" + salesforce_config["opportunityUrl"] = ( + salesforce_config["objectUrl"] + "Opportunity/" + ) + salesforce_config["caseUrl"] = salesforce_config["objectUrl"] + "Case/" + salesforce_config["assetUrl"] = salesforce_config["objectUrl"] + "Asset/" + salesforce_config["contractUrl"] = salesforce_config["objectUrl"] + "Contract/" # Set the data for the token request salesforce_config["authenticationData"] = { @@ -111,6 +163,8 @@ def __init__( self._config = salesforce_config + # end method definition + def config(self) -> dict: """Returns the configuration dictionary @@ -143,8 +197,10 @@ def request_header(self, content_type: str = "application/json") -> dict: request_header = { "Authorization": "Bearer {}".format(self._access_token), - "Content-Type": content_type, } + if content_type: + request_header["Content-Type"] = content_type + return request_header # end method definition @@ -278,16 +334,16 @@ def authenticate(self, revalidate: bool = False) -> str | None: # Already authenticated and session still valid? if self._access_token and not revalidate: - logger.info( + logger.debug( "Session still valid - return existing access token -> %s", str(self._access_token), ) return self._access_token request_url = self.config()["authenticationUrl"] - request_header = request_login_headers + request_header = REQUEST_LOGIN_HEADERS - logger.info("Requesting Salesforce Access Token from -> %s", request_url) + logger.debug("Requesting Salesforce Access Token from -> %s", request_url) authenticate_post_body = self.credentials() @@ -346,18 +402,17 @@ def get_object_id_by_name( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/query/" + request_url = self.config()["queryUrl"] query = f"SELECT Id FROM {object_type} WHERE {name_field} = '{name}'" retries = 0 while True: response = requests.get( - request_url, + url=request_url, headers=request_header, params={"q": query}, timeout=REQUEST_TIMEOUT, @@ -367,13 +422,13 @@ def get_object_id_by_name( object_id = self.get_result_value(response, "Id") return object_id elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 else: logger.error( - "Failed to get Salesforce object ID for object type -> %s and object name -> %s; status -> %s; error -> %s", + "Failed to get Salesforce object ID for object type -> '%s' and object name -> '%s'; status -> %s; error -> %s", object_type, name, response.status_code, @@ -383,36 +438,6 @@ def get_object_id_by_name( # end method definition - def get_profile_id(self, profile_name: str) -> Optional[str]: - """Get a user profile ID by profile name. - - Args: - profile_name (str): Name of the User Profile. - - Returns: - Optional[str]: Technical ID of the user profile. - """ - - return self.get_object_id_by_name(object_type="Profile", name=profile_name) - - # end method definition - - def get_user_id(self, username: str) -> Optional[str]: - """Get a user ID by user name. - - Args: - username (str): Name of the User. - - Returns: - Optional[str]: Technical ID of the user - """ - - return self.get_object_id_by_name( - object_type="User", name=username, name_field="Username" - ) - - # end method definition - def get_object( self, object_type: str, @@ -433,11 +458,33 @@ def get_object( Returns: dict | None: Dictionary with the Salesforce object data. + + Example response: + { + 'totalSize': 2, + 'done': True, + 'records': [ + { + 'attributes': { + 'type': 'Opportunity', + 'url': '/services/data/v60.0/sobjects/Opportunity/006Dn00000EclybIAB' + }, + 'Id': '006Dn00000EclybIAB' + }, + { + 'attributes': { + 'type': 'Opportunity', + 'url': '/services/data/v60.0/sobjects/Opportunity/006Dn00000EclyfIAB' + }, + 'Id': '006Dn00000EclyfIAB' + } + ] + } """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() + if search_field and not search_value: logger.error( "No search value has been provided for search field -> %s!", @@ -445,7 +492,7 @@ def get_object( ) return None if not result_fields: - logger.info( + logger.debug( "No result fields defined. Using 'FIELDS(STANDARD)' to deliver all standard fields of the object." ) result_fields = ["FIELDS(STANDARD)"] @@ -456,19 +503,21 @@ def get_object( query += " LIMIT {}".format(str(limit)) request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/query/?q={query}" + request_url = self.config()["queryUrl"] + "?q={}".format(query) - logger.info( + logger.debug( "Sending query -> %s to Salesforce; calling -> %s", query, request_url ) retries = 0 while True: - response = requests.get(request_url, headers=request_header, timeout=30) + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -491,9 +540,10 @@ def add_object(self, object_type: str, **kwargs: Any) -> dict | None: Args: object_type (str): Type of the Salesforce business object, like "Account" or "Case". + **kwargs (dict): keyword / value ictionary with additional parameters Returns: - dict | None: Dictionary with the Salesforce Case data or None if the request fails. + dict | None: Dictionary with the Salesforce object data or None if the request fails. """ match object_type: @@ -568,27 +618,40 @@ def add_object(self, object_type: str, **kwargs: Any) -> dict | None: # end method definition - def get_user(self, user_id: str) -> dict | None: - """Get a Salesforce user based on its ID. + def get_group_id(self, groupname: str) -> Optional[str]: + """Get a group ID by group name. Args: - user_id (str): ID of the Salesforce user + groupname (str): Name of the Group. Returns: - dict | None: Dictionary with the Salesforce user data or None if the request fails. + Optional[str]: Technical ID of the group + """ + + return self.get_object_id_by_name( + object_type="Group", name=groupname, name_field="Name" + ) + + # end method definition + + def get_group(self, group_id: str) -> dict | None: + """Get a Salesforce group based on its ID. + + Args: + group_id (str): ID of the Salesforce group + + Returns: + dict | None: Dictionary with the Salesforce group data or None if the request fails. """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = ( - f"{self._instance_url}/services/data/v52.0/sobjects/User/{user_id}" - ) + request_url = self.config()["groupUrl"] + group_id - logger.info( - "Get Salesforce user with ID -> %s; calling -> %s", user_id, request_url + logger.debug( + "Get Salesforce group with ID -> %s; calling -> %s", group_id, request_url ) retries = 0 @@ -599,14 +662,254 @@ def get_user(self, user_id: str) -> dict | None: if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 else: logger.error( - "Failed to get Salesforce user -> %s; status -> %s; error -> %s", - user_id, + "Failed to get Salesforce group -> %s; status -> %s; error -> %s", + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_group( + self, + group_name: str, + group_type: str = "Regular", + ) -> dict | None: + """Add a new Salesforce group. + + Args: + group_name (str): Name of the new Salesforce group + + Returns: + dict | None: Dictionary with the Salesforce Group data or None if the request fails. + + Example response: + { + 'id': '00GDn000000KWE0MAO', + 'success': True, + 'errors': [] + } + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_header = self.request_header() + request_url = self.config()["groupUrl"] + + payload = {"Name": group_name, "Type": group_type} + + logger.debug( + "Adding Salesforce group -> %s; calling -> %s", group_name, request_url + ) + + retries = 0 + while True: + response = requests.post( + request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to add Salesforce group -> %s; status -> %s; error -> %s", + group_name, + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_group( + self, + group_id: str, + update_data: dict, + ) -> dict: + """Update a Salesforce group. + + Args: + group_id (str): The Salesforce group ID. + update_data (dict): Dictionary containing the fields to update. + + Returns: + dict: Response from the Salesforce API. + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_header = self.request_header() + + request_url = self.config()["groupUrl"] + group_id + + logger.debug( + "Update Salesforce group with ID -> %s; calling -> %s", + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.patch( + request_url, + json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to update Salesforce group -> %s; status -> %s; error -> %s", + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_group_members(self, group_id: str) -> list | None: + """Get Salesforce group members + + Args: + group_id (str): Id of the group to retrieve the members + + Returns: + list | None: result + + Example response: + { + 'totalSize': 1, + 'done': True, + 'records': [ + { + 'attributes': { + 'type': 'GroupMember', + 'url': '/services/data/v60.0/sobjects/GroupMember/011Dn000000ELhwIAG' + }, + 'UserOrGroupId': '00GDn000000KWE5MAO' + } + ] + } + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_header = self.request_header() + + request_url = self.config()["queryUrl"] + + query = f"SELECT UserOrGroupId FROM GroupMember WHERE GroupId = '{group_id}'" + params = {"q": query} + + logger.debug( + "Get members of Salesforce group with ID -> %s; calling -> %s", + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.get( + request_url, + headers=request_header, + params=params, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to retrieve members of Salesforce group with ID -> %s; status -> %s; error -> %s", + group_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_group_member(self, group_id: str, member_id: str) -> dict | None: + """Add a user or group to a Salesforce group + + Args: + group_id (str): ID of the Salesforce Group to add member to. + member_id (str): ID of the user or group. + + Returns: + dict | None: Dictionary with the Salesforce membership data or None if the request fails. + + Example response (id is the membership ID): + { + 'id': '011Dn000000ELhwIAG', + 'success': True, + 'errors': [] + } + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_url = self.config()["groupMemberUrl"] + + request_header = self.request_header() + + payload = {"GroupId": group_id, "UserOrGroupId": member_id} + + logger.debug( + "Add member with ID -> %s to Salesforce group with ID -> %s; calling -> %s", + member_id, + group_id, + request_url, + ) + + retries = 0 + while True: + response = requests.post( + request_url, + headers=request_header, + json=payload, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to retrieve members of Salesforce group with ID -> %s; status -> %s; error -> %s", + group_id, response.status_code, response.text, ) @@ -648,11 +951,10 @@ def get_all_user_profiles(self) -> dict | None: """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/query/" + request_url = self.config()["queryUrl"] query = "SELECT Id, Name, CreatedById, CreatedDate, Description, LastModifiedById, LastModifiedDate, PermissionsCustomizeApplication, PermissionsEditTask, PermissionsImportLeads FROM Profile" @@ -667,7 +969,7 @@ def get_all_user_profiles(self) -> dict | None: if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -681,27 +983,112 @@ def get_all_user_profiles(self) -> dict | None: # end method definition + def get_user_profile_id(self, profile_name: str) -> Optional[str]: + """Get a user profile ID by profile name. + + Args: + profile_name (str): Name of the User Profile. + + Returns: + Optional[str]: Technical ID of the user profile. + """ + + return self.get_object_id_by_name(object_type="Profile", name=profile_name) + + # end method definition + + def get_user_id(self, username: str) -> Optional[str]: + """Get a user ID by user name. + + Args: + username (str): Name of the User. + + Returns: + Optional[str]: Technical ID of the user + """ + + return self.get_object_id_by_name( + object_type="User", name=username, name_field="Username" + ) + + # end method definition + + def get_user(self, user_id: str) -> dict | None: + """Get a Salesforce user based on its ID. + + Args: + user_id (str): ID of the Salesforce user + + Returns: + dict | None: Dictionary with the Salesforce user data or None if the request fails. + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_header = self.request_header() + request_url = self.config()["userUrl"] + user_id + + logger.debug( + "Get Salesforce user with ID -> %s; calling -> %s", user_id, request_url + ) + + retries = 0 + while True: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to get Salesforce user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + def add_user( self, username: str, email: str, - password: str, firstname: str, lastname: str, + title: str | None = None, + department: str | None = None, + company_name: str = "Innovate", + profile_name: Optional[str] = "Standard User", profile_id: Optional[str] = None, + time_zone_key: Optional[str] = "America/Los_Angeles", + email_encoding_key: Optional[str] = "ISO-8859-1", + locale_key: Optional[str] = "en_US", alias: Optional[str] = None, ) -> dict | None: - """Add a new Salesforce user. + """Add a new Salesforce user. The password has to be set separately. Args: username (str): Login name of the new user email (str): Email of the new user - password (str): Password of the new user firstname (str): First name of the new user. lastname (str): Last name of the new user. + title (str): Title of the user. + department (str): Department of the user. + company_name (str): Name of the Company of the user. + profile_name (str): Profile name like "Standard User" profile_id (str, optional): Profile ID of the new user. Defaults to None. Use method get_all_user_profiles() to determine - the desired Profile for the user. + the desired Profile for the user. Or pass the profile_name. + time_zone_key (str, optional) in format country/city like "America/Los_Angeles", + email_encoding_key (str, optional). Default is "ISO-8859-1". + locale_key (str, optional). Default is "en_US". alias (str, optional): Alias of the new user. Defaults to None. Returns: @@ -709,23 +1096,32 @@ def add_user( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/User/" + request_url = self.config()["userUrl"] + + # if just a profile name is given then we determine the profile ID by the name: + if profile_name and not profile_id: + profile_id = self.get_user_profile_id(profile_name) payload = { "Username": username, "Email": email, - "Password": password, "FirstName": firstname, "LastName": lastname, "ProfileId": profile_id, - "Alias": alias, + "Department": department, + "CompanyName": company_name, + "Title": title, + "Alias": alias if alias else username, + "TimeZoneSidKey": time_zone_key, # Set default TimeZoneSidKey + "LocaleSidKey": locale_key, # Set default LocaleSidKey + "EmailEncodingKey": email_encoding_key, # Set default EmailEncodingKey + "LanguageLocaleKey": locale_key, # Set default LanguageLocaleKey } - logger.info( + logger.debug( "Adding Salesforce user -> %s; calling -> %s", username, request_url ) @@ -740,7 +1136,7 @@ def add_user( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -755,6 +1151,193 @@ def add_user( # end method definition + def update_user( + self, + user_id: str, + update_data: dict, + ) -> dict: + """Update a Salesforce user. + + Args: + user_id (str): The Salesforce user ID. + update_data (dict): Dictionary containing the fields to update. + + Returns: + dict: Response from the Salesforce API. + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_header = self.request_header() + + request_url = self.config()["userUrl"] + user_id + + logger.debug( + "Update Salesforce user with ID -> %s; calling -> %s", user_id, request_url + ) + + retries = 0 + while True: + response = requests.patch( + request_url, + json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to update Salesforce user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_user_password( + self, + user_id: str, + password: str, + ) -> dict: + """Update the password of a Salesforce user. + + Args: + user_id (str): The Salesforce user ID. + password (str): New user password. + + Returns: + dict: Response from the Salesforce API. + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + request_header = self.request_header() + + request_url = self.config()["userUrl"] + "{}/password".format(user_id) + + logger.debug( + "Update password of Salesforce user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + update_data = {"NewPassword": password} + + retries = 0 + while True: + response = requests.post( + request_url, + json=update_data, + headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to update password of Salesforce user -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_user_photo( + self, + user_id: str, + photo_path: str, + ) -> dict | None: + """Update the Salesforce user photo. + + Args: + user_id (str): Salesforce ID of the user + photo_path (str): file system path with the location of the photo + Returns: + dict | None: Dictionary with the Salesforce User data or None if the request fails. + """ + + if not self._access_token or not self._instance_url: + self.authenticate() + + # Check if the photo file exists + if not os.path.isfile(photo_path): + logger.error("Photo file -> %s not found!", photo_path) + return None + + try: + # Read the photo file as binary data + with open(photo_path, "rb") as image_file: + photo_data = image_file.read() + except OSError as exception: + # Handle any errors that occurred while reading the photo file + logger.error( + "Error reading photo file -> %s; error -> %s", photo_path, exception + ) + return None + + request_header = self.request_header(content_type=None) + + data = {"json": json.dumps({"cropX": 0, "cropY": 0, "cropSize": 200})} + request_url = self.config()["connectUrl"] + f"user-profiles/{user_id}/photo" + files = { + "fileUpload": ( + photo_path, + photo_data, + "application/octet-stream", + ) + } + + logger.debug( + "Update profile photo of Salesforce user with ID -> %s; calling -> %s", + user_id, + request_url, + ) + + retries = 0 + while True: + response = requests.post( + request_url, + files=files, + data=data, + headers=request_header, + verify=False, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + return self.parse_request_response(response) + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + request_header = self.request_header() + retries += 1 + else: + logger.error( + "Failed to update profile photo of Salesforce user with ID -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + def add_account( self, account_name: str, @@ -783,11 +1366,10 @@ def add_account( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/Account/" + request_url = self.config()["accountUrl"] payload = { "Name": account_name, @@ -800,7 +1382,7 @@ def add_account( } payload.update(kwargs) # Add additional fields from kwargs - logger.info( + logger.debug( "Adding Salesforce account -> %s; calling -> %s", account_name, request_url ) @@ -815,7 +1397,7 @@ def add_account( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -851,11 +1433,10 @@ def add_product( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/Product2/" + request_url = self.config()["productUrl"] payload = { "Name": product_name, @@ -865,7 +1446,7 @@ def add_product( } payload.update(kwargs) # Add additional fields from kwargs - logger.info( + logger.debug( "Add Salesforce product -> %s; calling -> %s", product_name, request_url ) @@ -880,7 +1461,7 @@ def add_product( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -922,11 +1503,10 @@ def add_opportunity( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/Opportunity/" + request_url = self.config()["opportunityUrl"] payload = { "Name": name, @@ -939,7 +1519,7 @@ def add_opportunity( payload["Description"] = description payload.update(kwargs) # Add additional fields from kwargs - logger.info( + logger.debug( "Add Salesforce opportunity -> %s; calling -> %s", name, request_url ) @@ -954,7 +1534,7 @@ def add_opportunity( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -967,6 +1547,8 @@ def add_opportunity( ) return None + # end method definition + def add_case( self, subject: str, @@ -990,6 +1572,7 @@ def add_case( priority (str): Priority of the case. Typical values: "High", "Medium", "Low". origin (str): origin (source) of the case. Typical values: "Email", "Phone", "Web" account_id (str): technical ID of the related Account + owner_id (str): owner of the case asset_id (str): technical ID of the related Asset product_id (str): technical ID of the related Product kwargs (Any): additional values (e.g. custom fields) @@ -999,11 +1582,10 @@ def add_case( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/Case/" + request_url = self.config()["caseUrl"] payload = { "Subject": subject, @@ -1021,7 +1603,7 @@ def add_case( payload["ProductId"] = product_id payload.update(kwargs) # Add additional fields from kwargs - logger.info("Add Salesforce case -> %s; calling -> %s", subject, request_url) + logger.debug("Add Salesforce case -> %s; calling -> %s", subject, request_url) retries = 0 while True: @@ -1034,7 +1616,7 @@ def add_case( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1077,11 +1659,10 @@ def add_asset( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/Asset/" + request_url = self.config()["assetUrl"] payload = { "Name": asset_name, @@ -1095,7 +1676,7 @@ def add_asset( payload["Description"] = description payload.update(kwargs) # Add additional fields from kwargs - logger.info( + logger.debug( "Add Salesforce asset -> %s; calling -> %s", asset_name, request_url ) @@ -1110,7 +1691,7 @@ def add_asset( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 @@ -1151,11 +1732,10 @@ def add_contract( """ if not self._access_token or not self._instance_url: - logger.error("Authentication required.") - return None + self.authenticate() request_header = self.request_header() - request_url = f"{self._instance_url}/services/data/v52.0/sobjects/Contract/" + request_url = self.config()["contractUrl"] payload = { "AccountId": account_id, @@ -1169,7 +1749,7 @@ def add_contract( payload["ContractType"] = contract_type payload.update(kwargs) # Add additional fields from kwargs - logger.info( + logger.debug( "Adding Salesforce contract for account ID -> %s; calling -> %s", account_id, request_url, @@ -1186,7 +1766,7 @@ def add_contract( if response.ok: return self.parse_request_response(response) elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) request_header = self.request_header() retries += 1 diff --git a/pyxecm/customizer/sap.py b/pyxecm/customizer/sap.py index 69c39fe..0e23973 100644 --- a/pyxecm/customizer/sap.py +++ b/pyxecm/customizer/sap.py @@ -39,21 +39,21 @@ import logging +logger = logging.getLogger("pyxecm.customizer.sap") + try: import pyrfc _has_pyrfc = True except ModuleNotFoundError as module_exception: - logging.error("pyrfc not installed, SAP integration impatcted") + logger.error("pyrfc not installed, SAP integration impacted") _has_pyrfc = False except ImportError as import_exception: - logging.error("pyrfc could not be loaded, SAP integration impatcted") + logger.error("pyrfc could not be loaded, SAP integration impacted") _has_pyrfc = False -logger = logging.getLogger("pyxecm.customizer.sap") - class SAP(object): """Used to implement Remote Function Calls (RFC) to SAP S/4HANA""" diff --git a/pyxecm/customizer/servicenow.py b/pyxecm/customizer/servicenow.py new file mode 100644 index 0000000..d4c141c --- /dev/null +++ b/pyxecm/customizer/servicenow.py @@ -0,0 +1,1221 @@ +""" +ServiceNow Module to interact with the ServiceNow API +See: + +Class: ServiceNow +Methods: + +__init__ : class initializer +config : Returns config data set +credentials: Returns the token data +request_header: Returns the request header for ServiceNow API calls +parse_request_response: Parse the REST API responses and convert + them to Python dict in a safe way +exist_result_item: Check if an dict item is in the response + of the ServiceNow API call +get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response + +authenticate : Authenticates at ServiceNow API +get_oauth_token: Returns the OAuth access token. + +get_data: Get the Data object that holds all processed Knowledge base Articles +get_object: Get an ServiceNow object based on table name and ID +get_summary: Get summary object for an article. +get_knowledge_base_articles: Get selected / filtered Knowledge Base articles +make_file_names_unique: Make file names unique if required. The mutable + list is changed "in-place". +download_attachments: Download the attachments of a Knowledge Base Article (KBA) in ServiceNow. +load_articles: Main method to load ServiceNow articles in a Data Frame and + download the attchments. +load_article: Process a single KBA: download attachments (if any) + and add the KBA to the Data Frame. +load_articles_worker: Worker Method for multi-threading. +""" + +__author__ = "Dr. Marc Diefenbruch" +__copyright__ = "Copyright 2024, OpenText" +__credits__ = ["Kai-Philip Gatzweiler"] +__maintainer__ = "Dr. Marc Diefenbruch" +__email__ = "mdiefenb@opentext.com" + +import os +import json +import logging +import urllib.parse +import threading +import traceback +from functools import cache +import time + +import requests +from requests.auth import HTTPBasicAuth +from requests.exceptions import HTTPError, RequestException +from pyxecm.helper.data import Data + +logger = logging.getLogger("pyxecm.customizer.servicenow") + +REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"} + +REQUEST_TIMEOUT = 60 + +KNOWLEDGE_BASE_PATH = "/tmp/attachments" + + +class ServiceNow(object): + """Used to retrieve and automate stettings in ServiceNow.""" + + _config: dict + _access_token = None + _session = None + _data: Data = None + _thread_number = 3 + _download_dir = "" + + def __init__( + self, + base_url: str, + auth_type: str, + client_id: str, + client_secret: str, + username: str, + password: str, + token_url: str = "", + thread_number: int = 3, + download_dir: str = KNOWLEDGE_BASE_PATH, + ): + """Initialize the Service Now object + + Args: + base_url (str): base URL of the ServiceNow tenant + auth_type (str): authorization type, either "oauth" or "basic" + client_id (str): ServiceNow Client ID + client_secret (str): ServiceNow Client Secret + username (str): user name in Saleforce + password (str): password of the user + token_url (str, optional): Token URL for ServiceNow login via OAuth. + thread_number (int, optional): number of threads for parallel processing. Default is 3. + download_path (str): path to stored downloaded files from ServiceNow + """ + + servicenow_config = {} + + # Store the credentials and parameters in a config dictionary: + servicenow_config["baseUrl"] = base_url + servicenow_config["authType"] = auth_type + servicenow_config["clientId"] = client_id + servicenow_config["clientSecret"] = client_secret + servicenow_config["username"] = username + servicenow_config["password"] = password + if not token_url: + token_url = base_url + "/oauth_token.do" + else: + servicenow_config["tokenUrl"] = token_url + + servicenow_config["restUrl"] = servicenow_config["baseUrl"] + "/api/now/" + servicenow_config["tableUrl"] = servicenow_config["restUrl"] + "table" + servicenow_config["knowledgeUrl"] = ( + servicenow_config["restUrl"] + "table/kb_knowledge" + ) + servicenow_config["knowledgeBaseUrl"] = ( + servicenow_config["restUrl"] + "table/kb_knowledge_base" + ) + servicenow_config["attachmentsUrl"] = ( + servicenow_config["restUrl"] + "table/sys_attachment" + ) + servicenow_config["attachmentDownloadUrl"] = ( + servicenow_config["restUrl"] + "attachment" + ) + servicenow_config["statsUrl"] = servicenow_config["restUrl"] + "stats" + + self._config = servicenow_config + + self._session = requests.Session() + + self._data = Data() + + self._thread_number = thread_number + + self._download_dir = download_dir + + # end method definition + + def thread_wrapper(self, target, *args, **kwargs): + """Function to wrap around threads to catch exceptions during exection""" + try: + target(*args, **kwargs) + except Exception as e: + thread_name = threading.current_thread().name + logger.error("Thread %s: failed with exception %s", thread_name, e) + logger.error(traceback.format_exc()) + + # end method definition + + def config(self) -> dict: + """Returns the configuration dictionary + + Returns: + dict: Configuration dictionary + """ + return self._config + + # end method definition + + def get_data(self) -> Data: + """Get the Data object that holds all processed Knowledge base Articles + + Returns: + Data: Datastructure with all processed articles. + """ + + return self._data + + # end method definition + + def request_header(self, content_type: str = "") -> dict: + """Returns the request header used for Application calls. + Consists of Bearer access token and Content Type + + Args: + content_type (str, optional): custom content type for the request + Return: + dict: request header values + """ + + request_header = {} + + request_header = REQUEST_HEADERS + + if self.config()["authType"] == "oauth": + request_header["Authorization"] = ("Bearer {}".format(self._access_token),) + + if content_type: + request_header["Content-Type"] = content_type + + return request_header + + # end method definition + + def parse_request_response( + self, + response_object: requests.Response, + additional_error_message: str = "", + show_error: bool = True, + ) -> dict | None: + """Converts the request response (JSon) to a Python dict in a safe way + that also handles exceptions. It first tries to load the response.text + via json.loads() that produces a dict output. Only if response.text is + not set or is empty it just converts the response_object to a dict using + the vars() built-in method. + + Args: + response_object (object): this is reponse object delivered by the request call + additional_error_message (str, optional): use a more specific error message + in case of an error + show_error (bool): True: write an error to the log file + False: write a warning to the log file + Returns: + dict: response information or None in case of an error + """ + + if not response_object: + return None + + try: + if response_object.text: + dict_object = json.loads(response_object.text) + else: + dict_object = vars(response_object) + except json.JSONDecodeError as exception: + if additional_error_message: + message = "Cannot decode response as JSON. {}; error -> {}".format( + additional_error_message, exception + ) + else: + message = "Cannot decode response as JSON; error -> {}".format( + exception + ) + if show_error: + logger.error(message) + else: + logger.warning(message) + return None + else: + return dict_object + + # end method definition + + def exist_result_item(self, response: dict, key: str, value: str) -> bool: + """Check existence of key / value pair in the response properties of an ServiceNow API call. + + Args: + response (dict): REST response from an Salesforce API call + key (str): property name (key) + value (str): value to find in the item with the matching key + Returns: + bool: True if the value was found, False otherwise + """ + + if not response: + return False + + if "result" in response: + records = response["result"] + if not records or not isinstance(records, list): + return False + + for record in records: + if value == record[key]: + return True + else: + if not key in response: + return False + if value == response[key]: + return True + + return False + + # end method definition + + def get_result_value( + self, + response: dict, + key: str, + index: int = 0, + ) -> str | None: + """Get value of a result property with a given key of an ServiceNow API call. + + Args: + response (dict): REST response from an Salesforce REST Call + key (str): property name (key) + index (int, optional): Index to use (1st element has index 0). + Defaults to 0. + Returns: + str: value for the key, None otherwise + """ + + # ServiceNow responses should always have a "result": + if not response or not "result" in response: + return None + + values = response["result"] + if not values: + return None + + # Service now can either have a dict or a list structure + # in "results": + if isinstance(values, list) and len(values) - 1 < index: + value = values[index][key] + elif isinstance(values, dict) and key in values: + value = values[key] + else: + logger.error("Illegal data type in ServiceNow response!") + return None + + return value + + # end method definition + + def authenticate(self, auth_type: str) -> str | None: + """Authenticate at ServiceNow with client ID and client secret or with basic authentication.""" + + self._session.headers.update(self.request_header()) + + if auth_type == "basic": + username = self.config()["username"] + password = self.config()["password"] + if not self._session: + self._session = requests.Session() + self._session.auth = HTTPBasicAuth(username, password) + return self._session.auth + elif auth_type == "oauth": + token = self.get_oauth_token() + self._session.headers.update({"Authorization": "Bearer {}".format(token)}) + + return token + else: + logger.error("Unsupported authentication type") + return None + + # end method definition + + def get_oauth_token(self) -> str: + """Returns the OAuth access token. + + Returns: + str: Access token + """ + + token_post_body = { + "grant_type": "client_credentials", + "client_id": self.config()["client_id"], + "client_secret": self.config()["client_secret"], + } + + response = requests.post( + url=self.config()["token_url"], + data=token_post_body, + timeout=REQUEST_TIMEOUT, + ) + + if response.ok: + authenticate_dict = self.parse_request_response(response) + if not authenticate_dict: + return None + else: + # Store authentication access_token: + self._access_token = authenticate_dict["access_token"] + logger.debug("Access Token -> %s", self._access_token) + else: + logger.error( + "Failed to request an Service Now Access Token; error -> %s", + response.text, + ) + return None + + return self._access_token + + # end method definition + + @cache + def get_object(self, table_name: str, sys_id: str) -> dict | None: + """Get an ServiceNow object based on table name and ID + + Args: + table_name (str): Name of the ServiceNow table. + sys_id (str): ID of the data set to resolve. + + Returns: + dict | None: dictionary of fields of resulting table row or None + in case an error occured. + """ + + if not table_name: + logger.error("Table name is missing!") + return None + + if not sys_id: + logger.error("System ID of item to lookup is missing!") + return None + + request_header = self.request_header() + + request_url = self.config()["restUrl"] + "table/{}/{}".format( + table_name, sys_id + ) + + try: + response = self._session.get(url=request_url, headers=request_header) + data = self.parse_request_response(response) + + return data + except HTTPError as http_err: + logger.error( + "HTTP error occurred while resolving -> %s in table -> '%s': %s", + sys_id, + table_name, + str(http_err), + ) + except RequestException as req_err: + logger.error( + "Request error occurred while resolving -> %s in table -> '%s': %s", + sys_id, + table_name, + str(req_err), + ) + except Exception as err: + logger.error( + "An error occurred while resolving -> %s in table -> '%s': %s", + sys_id, + table_name, + str(err), + ) + + return None + + # end method definition + + def get_summary(self, summary_sys_id: str) -> dict | None: + """Get summary object for an article. + + Args: + summary_sys_id (str): System ID of the article + + Returns: + dict | None: _description_ + """ + + return self.get_object(table_name="kb_knowledge_summary", sys_id=summary_sys_id) + + def get_table( + self, + table_name: str, + query: str = "", + fields: list | None = None, + limit: int | None = 10, + offset: int = 0, + error_string: str = "", + ) -> list | None: + """Retrieve a specified ServiceNow column. + + Args: + table_name (str): Name of the ServiceNow table + query (str, optional): Query to filter the the articles. + fields (list, optional): Just return the fileds in this list. + Defaults to None which means to deliver + all fields. + limit (int, optional): Number of results to return. None = unlimited. + offset (int, optional): first item to return (for chunking) + error_string (str, optional): custom error string + + Returns: + list | None: List or articles or None if the request fails. + """ + + request_header = self.request_header() + + params = {} + + if query: + params["sysparm_query"] = query + if fields: + params["sysparm_fields"] = ",".join(fields) + if limit: + params["sysparm_limit"] = limit + if offset: + params["sysparm_offset"] = offset + + encoded_query = urllib.parse.urlencode(params, doseq=True) + + request_url = self.config()["tableUrl"] + "/{}?{}".format( + table_name, encoded_query + ) + + try: + while True: + response = self._session.get( + url=request_url, headers=request_header # , params=params + ) + data = self.parse_request_response(response) + + if response.status_code == 200: + return data.get("result", []) + elif response.status_code == 202: + logger.warning( + "Service Now returned <202 Accepted> -> throtteling, retrying ..." + ) + time.sleep(1000) + else: + return None + + except HTTPError as http_err: + logger.error("%sHTTP error -> %s!", error_string, str(http_err)) + except RequestException as req_err: + logger.error("%sRequest error -> %s!", error_string, str(req_err)) + except Exception as err: + logger.error("%sError -> %s!", error_string, str(err)) + + return None + + def get_knowledge_bases(self) -> list | None: + """Get the configured knowledge bases in Service Now. + + Returns: + list | None: list of configured knowledge bases or None in case of an error. + + Example: + [ + { + 'mandatory_fields': '', + 'template': '', + 'enable_socialqa': 'false', + 'icon': '', 'description': '', + 'question_annotation': '', + 'sys_updated_on': '2022-10-05 18:55:55', + 'title': 'Support articles, alerts & useful tools', + 'disable_suggesting': 'false', + 'related_products': '', + 'sys_id': '58819851db61b41068cfd6c4e29619bf', + 'disable_category_editing': 'true', + 'enable_blocks': 'true', + 'sys_updated_by': 'nmohamme@opentext.com', + 'article_validity': '', + 'disable_commenting': 'true', + 'sys_created_on': '2021-07-23 11:37:50', + 'sys_domain': {...}, + 'kb_version': '3', + 'sys_created_by': 'marquezj', + 'table': 'kb_knowledge', + 'order': '', + 'owner': { + 'link': 'https://support.opentext.com/api/now/table/sys_user/053429e31b5f0114fea2ec20604bcb95', + 'value': '053429e31b5f0114fea2ec20604bcb95' + }, + 'retire_workflow': { + 'link': 'https://support.opentext.com/api/now/table/wf_workflow/6b3e7ce6dbedb81068cfd6c4e2961936', + 'value': '6b3e7ce6dbedb81068cfd6c4e2961936' + }, + 'languages': 'en,fq,de,ja,es,pb', + 'workflow': { + 'link': 'https://support.opentext.com/api/now/table/wf_workflow/184cb8e2dbedb81068cfd6c4e296199c', + 'value': '184cb8e2dbedb81068cfd6c4e296199c' + }, + 'approval_description': '', + 'disable_mark_as_helpful': 'false', + 'sys_mod_count': '76', + 'active': 'true', + 'sys_domain_path': '/', + 'sys_tags': '', + 'application': { + 'link': 'https://support.opentext.com/api/now/table/sys_scope/global', + 'value': 'global' + }, + 'card_color': '', + 'disable_rating': 'false', + 'create_translation_task': 'false', + 'kb_managers': 'acab67001b6b811461a7a8e22a4bcbbe,7ab0b6801ba205d061a7a8e22a4bcbec,2a685f4c1be7811461a7a8e22a4bcbfd,6cc3c3d2db21781068cfd6c4e2961962,053429e31b5f0114fea2ec20604bcb95,5454eb441b6b0514fea2ec20604bcbfc,3a17970c1be7811461a7a8e22a4bcb23' + }, + ... + ] + """ + + return self.get_table( + table_name="kb_knowledge_base", error_string="Cannot get Knowledge Bases; " + ) + + # end method definition + + def get_table_count( + self, + table_name: str, + query: str | None = None, + ) -> int: + """Get number of Knowledge Base Articles matching the query (or if query = "" it should be the total number) + + Args: + table_name (str): name of the ServiceNow table + query (str, optional): Query string to filter the results. Defaults to "". + + Returns: + int: Number of Knowledge Base Articles. + """ + + request_header = self.request_header() + + params = {"sysparm_count": "true"} + + if query: + params["sysparm_query"] = query + + encoded_query = urllib.parse.urlencode(params, doseq=True) + + request_url = self.config()["statsUrl"] + "/{}?{}".format( + table_name, encoded_query + ) + + try: + response = self._session.get( + url=request_url, headers=request_header, timeout=600 + ) + data = self.parse_request_response(response) + return int(data["result"]["stats"]["count"]) + except HTTPError as http_err: + logger.error("HTTP error occurred -> %s!", str(http_err)) + except RequestException as req_err: + logger.error("Request error occurred -> %s!", str(req_err)) + except Exception as err: + logger.error("An error occurred -> %s!", str(err)) + + return None + + # end method definition + + def get_knowledge_base_articles( + self, + query: str = "", + fields: list | None = None, + limit: int | None = 10, + offset: int = 0, + ) -> list | None: + """Get selected / filtered Knowledge Base articles + + Args: + query (str, optional): Query to filter the the articles. + fields (list, optional): Just return the fileds in this list. + Defaults to None which means to deliver + all fields. + limit (int, optional): Number of results to return. None = unlimited. + offset (int, optional): first item to return (for chunking) + + Returns: + list | None: List or articles or None if the request fails. + + Example: + [ + { + 'parent': '', + 'wiki': None, + 'rating': '', + 'language': 'en', + 'source': '', + 'sys_updated_on': '2024-02-28 21:37:47', + 'number': 'KB0530086', + 'u_sub_product_line': 'cc1c280387655d506d9a2f8f8bbb35e0', + 'sys_updated_by': 'scotts@opentext.com', + 'sys_created_on': '2024-02-28 21:37:16', + 'sys_domain': { + 'link': 'https://support.opentext.com/api/now/table/sys_user_group/global', + 'value': 'global' + }, + 'workflow_state': 'published', + 'text': '', + 'sys_created_by': 'scotts@opentext.com', + 'scheduled_publish_date': '', + 'image': '', + 'author': { + 'link': 'https://support.opentext.com/api/now/table/sys_user/ffd35065875499109fdd2f8f8bbb353f', + 'value': 'ffd35065875499109fdd2f8f8bbb353f' + }, + 'u_related_products_text_search': '
  • LearnFlex APP0578
  • ', + 'can_read_user_criteria': 'de3a815b1b0601109b6987b7624bcba6', + 'active': 'true', + 'cannot_read_user_criteria': '', + 'published': '2024-02-28', + 'helpful_count': '0', + 'sys_domain_path': '/', + 'version': { + 'link': 'https://support.opentext.com/api/now/table/kb_version/7cd172cf1b6cca10d7604223cd4bcb99', + 'value': '7cd172cf1b6cca10d7604223cd4bcb99' + }, + 'meta_description': 'In LearnFlex, what types of messages are in message management?', + 'kb_knowledge_base': { + 'link': 'https://support.opentext.com/api/now/table/kb_knowledge_base/58819851db61b41068cfd6c4e29619bf', + 'value': '58819851db61b41068cfd6c4e29619bf' + }, + 'meta': 'LearnFlex, 384, Message_Management, Message', + 'u_platform_choice': '', + 'topic': 'General', + 'display_number': 'KB0530086 v3.0', + 'u_product_line': '1f401ecc1bf6891061a7a8e22a4bcb7d', + 'base_version': { + 'link': 'https://support.opentext.com/api/now/table/kb_knowledge/740fbd4547651910ab0a9ed7536d4350', + 'value': '740fbd4547651910ab0a9ed7536d4350' + }, + 'short_description': 'LearnFlex - What Types of Messages are in Message Management?', + 'u_available_translations': 'English', + 'u_limited_release': 'No', + 'u_internal_review': '', + 'roles': '', + 'direct': 'false', + 'description': '', + 'disable_suggesting': 'false', + 'related_products': '52609e001b3a891061a7a8e22a4bcb96', + 'sys_class_name': 'u_kb_template_technical_article_public', + 'article_id': '740fbd4547651910ab0a9ed7536d4350', + 'sys_id': '91b13e8f1b6cca10d7604223cd4bcbc1', + 'use_count': '0', + 'flagged': 'false', + 'disable_commenting': 'true', + 'valid_to': '', + 'retired': '', + 'u_kc_object_id': '', + 'u_download_url': '', + 'display_attachments': 'false', + 'latest': 'true', + 'summary': { + 'link': 'https://support.opentext.com/api/now/table/kb_knowledge_summary/410fbd4547651910ab0a9ed7536d4356', + 'value': '410fbd4547651910ab0a9ed7536d4356' + }, + 'sys_view_count': '2', + 'revised_by': { + 'link': 'https://support.opentext.com/api/now/table/sys_user/6fea35401ba3811461a7a8e22a4bcb59', + 'value': '6fea35401ba3811461a7a8e22a4bcb59' + }, + 'article_type': 'text', + 'u_internal_class': '', + 'u_kc_parent_id': '', + 'confidence': 'validated', + 'sys_mod_count': '4', + 'sys_tags': '', + 'replacement_article': '', + 'taxonomy_topic': '', + 'u_application': '52609e001b3a891061a7a8e22a4bcb96', + 'view_as_allowed': 'true', + 'ownership_group': { + 'link': 'https://support.opentext.com/api/now/table/sys_user_group/9a1f66a0473d6d10b6a6778bd36d4375', + 'value': '9a1f66a0473d6d10b6a6778bd36d4375' + }, + 'category': '', + 'kb_category': { + 'link': 'https://support.opentext.com/api/now/table/kb_category/d0144f5edb21781068cfd6c4e2961992', + 'value': 'd0144f5edb21781068cfd6c4e2961992' + }, + 'governance': 'experience' + }, + ... + ] + """ + + return self.get_table( + table_name="u_kb_template_technical_article_public", # derived from table kb_knowledge + query=query, + fields=fields, + limit=limit, + offset=offset, + error_string="Cannot get knowledge base articles; ", + ) + + # end method definition + + def make_file_names_unique(self, file_list: list): + """Make file names unique if required. The mutable + list is changed "in-place". + + Args: + file_list (list): list of attachments as dictionaries + with "sys_id" and "file_name" keys. + """ + + # Dictionary to keep track of how many times each file name has been encountered + name_count = {} + + # Iterate through the list of dictionaries + for file_info in file_list: + original_name = file_info["file_name"] + name, ext = os.path.splitext(original_name) + + # Initialize count if this is the first time the name is encountered + if original_name not in name_count: + name_count[original_name] = 0 + + # Generate a unique file name if the original name has been seen before + if name_count[original_name] > 0: + new_name = f"{name} ({name_count[original_name]:02}){ext}" + # Check if this new name already exists in the list to avoid collisions. + # If it does, increment the suffix number until a unique name is found. + while any(f["file_name"] == new_name for f in file_list): + name_count[original_name] += 1 + new_name = f"{name} ({name_count[original_name]:02}){ext}" + file_info["file_name"] = new_name + + # Increment the count for this file name + name_count[original_name] += 1 + + # end method definition + + def download_attachments( + self, + article: dict, + skip_existing: bool = True, + ) -> bool: + """Download the attachments of a Knowledge Base Article (KBA) in ServiceNow. + + Args: + article (dict): dictionary holding the Service Now article data + skip_existing (bool, optional): skip download if file has been downloaded before + + Returns: + bool: True = success, False = failure + """ + + article_sys_id = article["sys_id"] + article_number = article["number"] + + request_header = self.request_header() + request_url = self.config()["attachmentsUrl"] + + params = { + "sysparm_query": "table_sys_id={}".format(article_sys_id), + "sysparm_fields": "sys_id,file_name", + } + + try: + response = self._session.get( + url=request_url, headers=request_header, params=params + ) + data = self.parse_request_response(response) + attachments = data.get("result", []) + if not attachments: + logger.debug( + "Knowledge base article -> %s does not have attachments to download!", + article_number, + ) + article["has_attachments"] = False + return False + else: + logger.info( + "Knowledge base article -> %s has %s attachments to download...", + article_number, + len(attachments), + ) + article["has_attachments"] = True + + # Service Now can have multiple files with the same name - we need to + # resolve this for Extended ECM: + self.make_file_names_unique(attachments) + + base_dir = os.path.join(self._download_dir, article_number) + + # save download dir for later use in bulkDocument processing... + article["download_dir"] = base_dir + + article["download_files"] = [] + article["download_files_ids"] = [] + + if not os.path.exists(base_dir): + os.makedirs(base_dir) + + for attachment in attachments: + file_path = os.path.join(base_dir, attachment["file_name"]) + + # we build a list of filenames and ids. + # the ids we want to use as nicknames later on + article["download_files"].append(attachment["file_name"]) + article["download_files_ids"].append(attachment["sys_id"]) + if os.path.exists(file_path) and skip_existing: + logger.info( + "File -> %s has been downloaded before. Skipping download...", + file_path, + ) + continue + attachment_download_url = ( + self.config()["attachmentDownloadUrl"] + + "/" + + attachment["sys_id"] + + "/file" + ) + attachment_response = self._session.get( + attachment_download_url, stream=True + ) + attachment_response.raise_for_status() + + logger.info( + "Downloading attachment file -> '%s' for article -> %s from ServiceNow...", + file_path, + article_number, + ) + with open(file_path, "wb") as file: + for chunk in attachment_response.iter_content(chunk_size=8192): + file.write(chunk) + + return True + except HTTPError as http_err: + logger.error("HTTP error occurred -> %s!", str(http_err)) + except RequestException as req_err: + logger.error("Request error occurred -> %s!", str(req_err)) + except Exception as err: + logger.error("An error occurred -> %s!", str(err)) + + return False + + # end method definition + + def load_articles(self, table_name: str, query: str | None) -> bool: + """Main method to load ServiceNow articles in a Data Frame and + download the attchments. + + Args: + query (str): Filter criteria for the articles. + + Returns: + bool: True = Success, False = Failure + """ + + total_count = self.get_table_count(table_name=table_name, query=query) + logger.info( + "Total number of Knowledge Base Articles (KBA) -> %s", str(total_count) + ) + + number = self._thread_number + + if total_count >= number: + partition_size = total_count // number + remainder = total_count % number + else: + partition_size = total_count + remainder = 0 + number = 1 + + logger.info( + "Processing -> %s Knowledge Base Articles (KBA), thread number -> %s, partition size -> %s", + str(total_count), + number, + partition_size, + ) + + threads = [] + + current_offset = 0 + for i in range(number): + current_partition_size = partition_size + (1 if i < remainder else 0) + thread = threading.Thread( + name=f"load_articles_{i+1:02}", + target=self.thread_wrapper, + args=( + self.load_articles_worker, + query, + current_partition_size, + current_offset, + ), + ) + thread.start() + threads.append(thread) + current_offset += current_partition_size + + for thread in threads: + thread.join() + + return True + + # end method definition + + def load_articles_worker( + self, query: str, partition_size: int, partition_offset: int + ) -> None: + """Worker Method for multi-threading. + + Args: + query (str): Query to select the relevant KBA. + partition_size (int): Total size of the partition assigned to this thread. + partition_offset (int): Starting offset for the KBAs this thread is processing. + """ + + logger.info( + "Processing KBAs in range from -> %s to -> %s...", + partition_offset, + partition_offset + partition_size, + ) + + # We cannot retrieve all KBAs in one go if the partition size is too big (> 100) + # So we define "limit" as the maximum number of KBAs we want to retrieve for one REST call. + # This should be a reasonable number to avoid timeouts. We also need to make sure + # the limit is not bigger than the the partition size: + limit = 100 if partition_size > 100 else partition_size + + for offset in range(partition_offset, partition_offset + partition_size, limit): + articles = self.get_knowledge_base_articles( + query=query, limit=limit, offset=offset + ) + logger.info( + "Retrieved a list of %s KBAs starting at offset -> %s to process.", + str(len(articles)), + offset, + ) + for article in articles: + logger.info("Processing KBA -> %s...", article["number"]) + self.load_article(article) + + # end method definition + + def load_article(self, article: dict, skip_existing_downloads: bool = True): + """Process a single KBA: download attachments (if any) + and add the KBA to the Data Frame. + + Args: + article (dict): Dictionary inclusing all fields of + a single KBA. + """ + + _ = self.download_attachments( + article=article, skip_existing=skip_existing_downloads + ) + + # + # Add additional columns from related ServiceNow tables: + # + + if article.get("kb_category"): + category_key = article.get("kb_category")["value"] + category_table_name = "kb_category" + category = self.get_object( + table_name=category_table_name, sys_id=category_key + ) + if category: + article["kb_category_name"] = self.get_result_value( + response=category, key="full_category" + ) + else: + logger.warning( + "Article -> %s has no category value!", article["number"] + ) + article["kb_category_name"] = "" + else: + logger.error("Article -> %s has no value for category!", article["number"]) + article["kb_category_name"] = "" + + knowledge_base_key = article.get("kb_knowledge_base")["value"] + knowledge_base_table_name = "kb_knowledge_base" + knowledge_base = self.get_object( + table_name=knowledge_base_table_name, sys_id=knowledge_base_key + ) + if knowledge_base: + article["kb_knowledge_base_name"] = self.get_result_value( + response=knowledge_base, key="title" + ) + else: + logger.warning( + "Article -> %s has no value for Knowledge Base!", + article["number"], + ) + article["kb_knowledge_base_name"] = "" + + related_product_names = [] + if article.get("related_products"): + related_product_keys = article.get("related_products").split(",") + related_product_table = "cmdb_model" + for related_product_key in related_product_keys: + related_product = self.get_object( + table_name=related_product_table, sys_id=related_product_key + ) + if related_product: + related_product_name = self.get_result_value( + response=related_product, key="name" + ) + logger.debug( + "Found related Product -> '%s' (%s)", + related_product_name, + related_product_key, + ) + related_product_names.append(related_product_name) + # Extended ECM can only handle a maxiumum of 50 line items: + if len(related_product_names) == 49: + logger.info( + "Reached maximum of 50 multi-value items for related Products of article -> %s", + article["number"], + ) + break + else: + logger.warning( + "Article -> %s: Cannot lookup related Product name in table -> '%s' with ID -> %s", + article["number"], + related_product_table, + related_product_key, + ) + else: + logger.warning( + "Article -> %s has no value related Products!", + article["number"], + ) + article["related_product_names"] = related_product_names + + product_line_names = [] + if article.get("u_product_line", None): + product_line_keys = article.get("u_product_line").split(",") + product_line_table = "u_ot_product_model" + for product_line_key in product_line_keys: + product_line = self.get_object( + table_name=product_line_table, sys_id=product_line_key + ) + if product_line: + product_line_name = self.get_result_value( + response=product_line, key="name" + ) + logger.debug( + "Found related Product Line -> '%s' (%s)", + product_line_name, + product_line_key, + ) + product_line_names.append(product_line_name) + # Extended ECM can only handle a maxiumum of 50 line items: + if len(product_line_names) == 49: + logger.info( + "Reached maximum of 50 multi-value items for related Product Lines of article -> %s", + article["number"], + ) + break + else: + logger.error( + "Article -> %s: Cannot lookup related Product Line name in table -> '%s' with ID -> %s", + article["number"], + product_line_table, + product_line_key, + ) + else: + logger.warning( + "Article -> %s has no value for related Product Lines!", + article["number"], + ) + article["u_product_line_names"] = product_line_names + + sub_product_line_names = [] + if article.get("u_sub_product_line", None): + sub_product_line_keys = article.get("u_sub_product_line").split(",") + sub_product_line_table = "u_ot_product_model" + for sub_product_line_key in sub_product_line_keys: + sub_product_line = self.get_object( + table_name=sub_product_line_table, sys_id=sub_product_line_key + ) + if sub_product_line: + sub_product_line_name = self.get_result_value( + response=sub_product_line, key="name" + ) + logger.debug( + "Found related Sub Product Line -> '%s' (%s)", + sub_product_line_name, + sub_product_line_key, + ) + sub_product_line_names.append(sub_product_line_name) + # Extended ECM can only handle a maxiumum of 50 line items: + if len(sub_product_line_names) == 49: + logger.info( + "Reached maximum of 50 multi-value items for related Sub Product Lines of article -> %s", + article["number"], + ) + break + else: + logger.error( + "Article -> %s: Cannot lookup related Sub Product Line name in table -> '%s' with ID -> %s", + article["number"], + sub_product_line_table, + sub_product_line_key, + ) + else: + logger.warning( + "Article -> %s has no value for related Sub Product Lines!", + article["number"], + ) + article["u_sub_product_line_names"] = sub_product_line_names + + application_names = [] + if article.get("u_application", None): + application_keys = article.get("u_application").split(",") + application_table_name = "u_ot_product_model" + for application_key in application_keys: + application = self.get_object( + table_name=application_table_name, sys_id=application_key + ) + if application: + application_name = self.get_result_value( + response=application, key="name" + ) + logger.debug( + "Found related Application -> '%s' (%s)", + application_name, + application_key, + ) + application_names.append(application_name) + # Extended ECM can only handle a maxiumum of 50 line items: + if len(application_names) == 49: + logger.info( + "Reached maximum of 50 multi-value items for related Applications of article -> %s", + article["number"], + ) + break + else: + logger.warning( + "Article -> %s: Cannot lookup related Application name in table -> '%s' with ID -> %s", + article["number"], + application_table_name, + application_key, + ) + else: + logger.warning( + "Article -> %s has no value for related Applications!", + article["number"], + ) + article["u_application_names"] = application_names + + # Now we add the article to the Pandas Data Frame in the Data class: + with self._data.lock(): + self._data.append(article) + + # end method definition diff --git a/pyxecm/customizer/successfactors.py b/pyxecm/customizer/successfactors.py new file mode 100644 index 0000000..cf48c7e --- /dev/null +++ b/pyxecm/customizer/successfactors.py @@ -0,0 +1,1056 @@ +""" +SuccessFactors Module to interact with the SuccessFactors API + +See: +https://community.sap.com/t5/enterprise-resource-planning-blogs-by-members/how-to-initiate-an-oauth-connection-to-successfactors-employee-central/ba-p/13332388 +https://help.sap.com/docs/SAP_SUCCESSFACTORS_PLATFORM/d599f15995d348a1b45ba5603e2aba9b/78b1d8aac783455684a7de7a8a5b0c04.html + +Class: SuccessFactors +Methods: + +__init__ : class initializer +config : Returns config data set +credentials: Returns the token data +idp_data: Return the IDP data used to request the SAML assertion +request_header: Returns the request header for SuccessFactors API calls +parse_request_response: Parse the REST API responses and convert + them to Python dict in a safe way +exist_result_item: Check if an dict item is in the response + of the SuccessFactors API call +get_result_value: Check if a defined value (based on a key) is in the SuccessFactors API response + +get_saml_assertion: Get SAML Assertion for SuccessFactors authentication +authenticate : Authenticates at SuccessFactors API + +get_country: Get information for a Country / Countries +get_user: Get a SuccessFactors user based on its ID. +get_user_account: Get information for a SuccessFactors User Account +update_user: Update user data. E.g. update the user password or email. +get_employee: Get a list of employee(s) matching given criterias. +get_entities_metadata: Get the schema (metadata) for a list of entities + (list can be empty to get it for all) +get_entity_metadata: Get the schema (metadata) for an entity +""" + +__author__ = "Dr. Marc Diefenbruch" +__copyright__ = "Copyright 2024, OpenText" +__credits__ = ["Kai-Philip Gatzweiler"] +__maintainer__ = "Dr. Marc Diefenbruch" +__email__ = "mdiefenb@opentext.com" + +import json +import logging +import time +import urllib.parse +import requests + +import xmltodict + +logger = logging.getLogger("pyxecm.customizer.sucessfactors") + +request_login_headers = { + "Content-Type": "application/x-www-form-urlencoded", # "application/json", + "Accept": "application/json", +} + +REQUEST_TIMEOUT = 60 +REQUEST_MAX_RETRIES = 5 +REQUEST_RETRY_DELAY = 60 + +class SuccessFactors(object): + """Used to retrieve and automate stettings in SuccessFactors.""" + + _config: dict + _access_token = None + _assertion = None + + def __init__( + self, + base_url: str, + as_url: str, + client_id: str, + client_secret: str, + username: str = "", + password: str = "", + company_id: str = "", + authorization_url: str = "", + ): + """Initialize the SuccessFactors object + + Args: + base_url (str): base URL of the SuccessFactors tenant + authorization_url (str): authorization URL of the SuccessFactors tenant, typically ending with "/services/oauth2/token" + client_id (str): SuccessFactors Client ID + client_secret (str): SuccessFactors Client Secret + username (str): user name in SuccessFactors + password (str): password of the user + authorization_url (str, optional): URL for SuccessFactors login. If not given it will be constructed with default values + using base_url + """ + + successfactors_config = {} + + # this class assumes that the base URL is provided without + # a trailing "/". Otherwise the trailing slash is removed. + if base_url.endswith("/"): + base_url = base_url[:-1] + + # Set the authentication endpoints and credentials + successfactors_config["baseUrl"] = base_url + successfactors_config["asUrl"] = as_url + successfactors_config["clientId"] = client_id + successfactors_config["clientSecret"] = client_secret + successfactors_config["username"] = username.split("@")[ + 0 + ] # we don't want the company ID in the user name + successfactors_config["password"] = password + if company_id: + successfactors_config["companyId"] = company_id + elif "@" in username: + # if the company ID is not provided as a parameter + # we check if it is included in the username: + company_id = username.split("@")[1] + successfactors_config["companyId"] = company_id + if authorization_url: + successfactors_config["authenticationUrl"] = authorization_url + else: + successfactors_config["authenticationUrl"] = ( + successfactors_config["baseUrl"] + "/oauth/token" + ) + + successfactors_config["idpUrl"] = ( + successfactors_config["baseUrl"] + "/oauth/idp" + ) + + if not username: + # Set the data for the token request + successfactors_config["authenticationData"] = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + # "username": successfactors_config["username"], + # "password": password, + } + else: + # Set the data for the token request + successfactors_config["authenticationData"] = { + # "grant_type": "password", + "grant_type": "urn:ietf:params:oauth:grant-type:saml2-bearer", + "company_id": successfactors_config["companyId"], + "username": successfactors_config["username"], + "password": password, + "client_id": client_id, + "client_secret": client_secret, + } + + successfactors_config["idpData"] = { + "client_id": client_id, + "user_id": successfactors_config["username"], + # "use_email": True, + "token_url": successfactors_config["authenticationUrl"], + "private_key": client_secret, + } + + self._config = successfactors_config + + # end method definition + + def config(self) -> dict: + """Returns the configuration dictionary + + Returns: + dict: Configuration dictionary + """ + return self._config + + # end method definition + + def credentials(self) -> dict: + """Return the login credentials + + Returns: + dict: dictionary with login credentials for SuccessFactors + """ + return self.config()["authenticationData"] + + # end method definition + + def idp_data(self) -> dict: + """Return the IDP data used to request the SAML assertion + + Returns: + dict: dictionary with IDP data for SuccessFactors + """ + return self.config()["idpData"] + + # end method definition + + def request_header(self, content_type: str = "application/json") -> dict: + """Returns the request header used for Application calls. + Consists of Bearer access token and Content Type + + Args: + content_type (str, optional): content type for the request + Return: + dict: request header values + """ + + request_header = { + "Authorization": "Bearer {}".format(self._access_token), + "Content-Type": content_type, + "Accept": content_type, + } + return request_header + + # end method definition + + def parse_request_response( + self, + response_object: requests.Response, + additional_error_message: str = "", + show_error: bool = True, + ) -> dict | None: + """Converts the request response (JSon) to a Python dict in a safe way + that also handles exceptions. It first tries to load the response.text + via json.loads() that produces a dict output. Only if response.text is + not set or is empty it just converts the response_object to a dict using + the vars() built-in method. + + Args: + response_object (object): this is reponse object delivered by the request call + additional_error_message (str, optional): use a more specific error message + in case of an error + show_error (bool): True: write an error to the log file + False: write a warning to the log file + Returns: + dict: response information or None in case of an error + """ + + if not response_object: + return None + + try: + if response_object.text: + dict_object = json.loads(response_object.text) + else: + dict_object = vars(response_object) + except json.JSONDecodeError as exception: + if additional_error_message: + message = "Cannot decode response as JSon. {}; error -> {}".format( + additional_error_message, exception + ) + else: + message = "Cannot decode response as JSon; error -> {}".format( + exception + ) + if show_error: + logger.error(message) + else: + logger.warning(message) + return None + else: + return dict_object + + # end method definition + + def exist_result_item(self, response: dict, key: str, value: str) -> bool: + """Check existence of key / value pair in the response properties of an SuccessFactors API call. + + Args: + response (dict): REST response from an SuccessFactors API call + key (str): property name (key) + value (str): value to find in the item with the matching key + Returns: + bool: True if the value was found, False otherwise + """ + + if not response: + return False + + if "d" in response: + data = response["d"] + if not key in data: + return False + if value == data[key]: + return True + else: + if not key in response: + return False + if value == response[key]: + return True + + return False + + # end method definition + + def get_result_value( + self, + response: dict, + key: str, + index: int = 0, + ) -> str | None: + """Get value of a result property with a given key of an SuccessFactors API call. + + Args: + response (dict): REST response from an SuccessFactors REST Call + key (str): property name (key) + index (int, optional): Index to use (1st element has index 0). + Defaults to 0. + Returns: + str: value for the key, None otherwise + """ + + if not response or not "d" in response: + return None + + data = response["d"] + + # list response types are wrapped in a "results" element + # which is of type list + if "results" in data: + results = data["results"] + if not results or not isinstance(results, list): + return None + try: + value = results[index][key] + except IndexError as e: + logger.error( + "Index error with index -> %s and key -> %s: %s", + str(index), + key, + str(e), + ) + return None + except KeyError as e: + logger.error( + "Key error with index -> %s and key -> %s: %s", + str(index), + key, + str(e), + ) + return None + else: # simple response - try to find key in response directly: + if not key in data: + return None + value = data[key] + + return value + + # end method definition + + def get_saml_assertion(self) -> str | None: + """Get SAML Assertion for SuccessFactors authentication. + + Args: + None + Returns: + str: Assertion. Also stores access token in self._assertion. None in case of error + """ + + request_url = self.config()["idpUrl"] + + # request_header = request_login_headers + + logger.debug("Requesting SuccessFactors SAML Assertion from -> %s", request_url) + + idp_post_body = self.config()["idpData"] + + response = None + self._assertion = None + + try: + response = requests.post( + request_url, + data=idp_post_body, + # headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + except requests.exceptions.ConnectionError as exception: + logger.error( + "Unable to get SAML assertion from -> %s : %s", + self.config()["idpUrl"], + exception, + ) + return None + + if response.ok: + assertion = response.text + self._assertion = assertion + logger.debug("Assertion -> %s", self._assertion) + return assertion + + logger.error( + "Failed to request an SuccessFactors SAML Assertion; error -> %s", + response.text, + ) + return None + + # end method definition + + def authenticate(self, revalidate: bool = False) -> str | None: + """Authenticate at SuccessFactors with client ID and client secret. + + Args: + revalidate (bool, optional): determinse if a re-athentication is enforced + (e.g. if session has timed out with 401 error) + Returns: + str: Access token. Also stores access token in self._access_token. None in case of error + """ + + if not self._assertion: + self._assertion = self.get_saml_assertion() + + # Already authenticated and session still valid? + if self._access_token and not revalidate: + logger.debug( + "Session still valid - return existing access token -> %s", + str(self._access_token), + ) + return self._access_token + + request_url = self.config()["authenticationUrl"] + + # request_header = request_login_headers + + logger.debug("Requesting SuccessFactors Access Token from -> %s", request_url) + + authenticate_post_body = self.credentials() + authenticate_post_body["assertion"] = self._assertion + + response = None + self._access_token = None + + try: + response = requests.post( + request_url, + data=authenticate_post_body, + # headers=request_header, + timeout=REQUEST_TIMEOUT, + ) + except requests.exceptions.ConnectionError as exception: + logger.warning( + "Unable to connect to -> %s : %s", + self.config()["authenticationUrl"], + exception, + ) + return None + + if response.ok: + authenticate_dict = self.parse_request_response(response) + if not authenticate_dict or not "access_token" in authenticate_dict: + return None + # Store authentication access_token: + self._access_token = authenticate_dict["access_token"] + logger.debug("Access Token -> %s", self._access_token) + else: + logger.error( + "Failed to request an SuccessFactors Access Token; error -> %s", + response.text, + ) + return None + + return self._access_token + + # end method definition + + def get_country(self, code: str = "") -> dict | None: + """Get information for a Country / Countries + + Args: + code (str): 3 character code for contry selection, like "USA" + + Returns: + dict | None: Country details + + Example return data in "d" dictionary: + + { + '__metadata': { + 'uri': "https://apisalesdemo2.successfactors.eu/odata/v2/UserAccount('twalker')", + 'type': 'SFOData.UserAccount' + }, + 'username': 'twalker', + 'lastModifiedDateTime': '/Date(1692701804000+0000)/', + 'accountUuid': '5c7390e0-d9d2-e348-1700-2b02b3a61aa5', + 'createdDateTime': '/Date(1420745485000+0000)/', + 'timeZone': 'US/Eastern', + 'lastInactivationDateTime': None, + 'accountIsLocked': 'FALSE', + 'accountStatus': 'ACTIVE', + 'defaultLocale': 'en_US', + 'lastLoginFailedDateTime': None, + 'accountId': '90', + 'sapGlobalUserId': None, + 'personIdExternal': '82094', + 'userType': 'employee', + 'email': 'twalker@m365x41497014.onmicrosoft.com', + 'user': {'__deferred': {...}} + } + """ + + if not self._access_token: + self.authenticate() + + if code: + request_url = self.config()["asUrl"] + "Country(code='{}')".format( + code + ) # ,effectiveStartDate=datetime'1900-01-01T00:00:00' + else: + request_url = self.config()["asUrl"] + "Country" + + request_header = self.request_header() + + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.status_code == 200: + return self.parse_request_response(response) + else: + logger.error( + "Failed to retrieve country data; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_user( + self, + user_id: str = "", # this is NOT the username but really an ID like 106020 + field_name: str = "", + field_value: str = "", + field_operation: str = "eq", + max_results: int = 1, + ) -> dict | None: + """Get information for a User Account + Inactive users are not returned by default. To query inactive users, + you can explicitly include the status in a $filter or use a key predicate. + If you want to query all users, use query option $filter=status in 't','f','T','F','e','d'. + + Args: + user_id (str): login name of the user (e.g. "twalker") + + Returns: + dict | None: User Account details + + Example return data in "d" dictionary: + + { + '__metadata': { + 'uri': "https://apisalesdemo2.successfactors.eu/odata/v2/User('106020')", + 'type': 'SFOData.User' + }, + 'userId': '106020', + 'salaryBudgetFinalSalaryPercentage': None, + 'dateOfCurrentPosition': '/Date(1388534400000)/', + 'matrix1Label': None, + 'salary': '79860.0', + 'objective': '0.0', + 'ssn': None, + 'state': 'New South Wales', + 'issueComments': None, + 'timeZone': 'Australia/Sydney', + 'defaultLocale': 'en_US', + 'nationality': None, + 'salaryBudgetLumpsumPercentage': None, + 'sysCostOfSource': None, + 'ethnicity': None, + 'displayName': 'Mark Burke', + 'payGrade': 'GR-06', + 'nickname': None, + 'email': 'Mark.Burke@bestrunsap.com', + 'salaryBudgetExtra2Percentage': None, + 'stockBudgetOther1Amount': None, + 'raiseProrating': None, + 'sysStartingSalary': None, + 'finalJobCode': None, + 'lumpsum2Target': None, + 'stockBudgetOptionAmount': None, + 'country': 'Australia', + 'lastModifiedDateTime': '/Date(1689005658000+0000)/', + 'stockBudgetStockAmount': None, + 'sciLastModified': None, + 'criticalTalentComments': None, + 'homePhone': None, + 'veteranSeparated': False, + 'stockBudgetOther2Amount': None, + 'firstName': 'Mark', + 'stockBudgetUnitAmount': None, + 'salutation': '10808', + 'impactOfLoss': None, + 'benchStrength': None, + 'sysSource': None, + 'futureLeader': None, + 'title': 'HR Business Partner', + 'meritEffectiveDate': None, + 'veteranProtected': False, + 'lumpsumTarget': None, + 'employeeClass': 'Active', + 'hireDate': '/Date(1388534400000)/', + 'matrix2Label': None, 'salaryLocal': None, + 'citizenship': None, + 'reasonForLeaving': None, + 'riskOfLoss': None, + 'location': 'Sydney (8510-0001)', + 'reloComments': None, + 'username': 'mburke', + 'serviceDate': None, + 'reviewFreq': None, + 'salaryBudgetTotalRaisePercentage': None, + ... + } + """ + + if not self._access_token: + self.authenticate() + + request_url = self.config()["asUrl"] + "User" + if user_id: + # querying a user by key predicate: + request_url += "('{}')".format(user_id) + + # Add query parameters (these are NOT passed via JSon body!) + query = {} + if field_name and field_value: + query["$filter"] = "{} {} {}".format( + field_name, field_operation, field_value + ) + if max_results > 0: + query["$top"] = max_results + encoded_query = urllib.parse.urlencode(query, doseq=True) + if query: + request_url += "?" + encoded_query + + request_header = self.request_header() + + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.status_code == 200: + return self.parse_request_response(response) + else: + logger.error( + "Failed to retrieve user data; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_user_account(self, username: str) -> dict | None: + """Get information for a SuccessFactors User Account + Inactive users are not returned by default. To query inactive users, + you can explicitly include the status in a $filter or use a key predicate. + If you want to query all users, use query option $filter=status in 't','f','T','F','e','d'. + + Args: + username (str): login name of the user (e.g. "twalker") + + Returns: + dict | None: User Account details + + Example return data in "d" dictionary: + + { + '__metadata': { + 'uri': "https://apisalesdemo2.successfactors.eu/odata/v2/UserAccount('twalker')", + 'type': 'SFOData.UserAccount' + }, + 'username': 'twalker', + 'lastModifiedDateTime': '/Date(1692701804000+0000)/', + 'accountUuid': '5c7390e0-d9d2-e348-1700-2b02b3a61aa5', + 'createdDateTime': '/Date(1420745485000+0000)/', + 'timeZone': 'US/Eastern', + 'lastInactivationDateTime': None, + 'accountIsLocked': 'FALSE', + 'accountStatus': 'ACTIVE', + 'defaultLocale': 'en_US', + 'lastLoginFailedDateTime': None, + 'accountId': '90', + 'sapGlobalUserId': None, + 'personIdExternal': '82094', + 'userType': 'employee', + 'email': 'twalker@m365x41497014.onmicrosoft.com', + 'user': {'__deferred': {...}} + } + """ + + if not self._access_token: + self.authenticate() + + request_url = self.config()["asUrl"] + "UserAccount('{}')".format(username) + + request_header = self.request_header() + + retries = 0 + + while True: + try: + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + response.raise_for_status() # This will raise an HTTPError for bad responses + return self.parse_request_response(response) + except requests.exceptions.HTTPError as http_err: + logger.error( + "Failed to retrieve user data from SuccessFactors; status -> %s; error -> %s", + response.status_code, + str(http_err), + ) + except requests.exceptions.Timeout: + logger.warning( + "Failed to retrieve user data from SuccessFactors. The request timed out.", + ) + except requests.exceptions.ConnectionError as conn_err: + logger.error( + "Cannot connect to SuccessFactors to retrieve user data; status -> %s; error -> %s", + response.status_code, + str(conn_err), + ) + except requests.exceptions.RequestException as req_err: + logger.error( + "Failed to retrieve user data from SuccessFactors; status -> %s; error -> %s", + response.status_code, + str(req_err), + ) + retries += 1 + if retries <= REQUEST_MAX_RETRIES: + logger.info("Retrying in %s seconds...", str(REQUEST_RETRY_DELAY)) + time.sleep(retries * REQUEST_RETRY_DELAY) + else: + break + + return None + + # end method definition + + def update_user( + self, + user_id: str, # this is NOT the username but really an ID like 106020 + update_data: dict, + ) -> dict: + """Update user data. E.g. update the user password or email. + See: https://help.sap.com/docs/SAP_SUCCESSFACTORS_PLATFORM/d599f15995d348a1b45ba5603e2aba9b/47c39724e7654b99a6be2f71fce1c50b.html?locale=en-US + + Args: + user_id (str): ID of the user (e.g. 106020) + update_data (dict): Update data + Returns: + dict: Request response or None if an error occured. + """ + + if not self._access_token: + self.authenticate() + + request_url = self.config()["asUrl"] + "User('{}')".format(user_id) + + request_header = self.request_header() + # We need to use a special MERGE header to tell + # SuccessFactors to only change the new / provided fields: + request_header["X-HTTP-METHOD"] = "MERGE" + + response = requests.post( + request_url, + headers=request_header, + json=update_data, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + logger.debug("User with ID -> %s updated successfully.", user_id) + return self.parse_request_response(response) + else: + logger.error( + "Failed to update user with ID -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_employee( + self, + entity: str = "PerPerson", + field_name: str = "", + field_value: str = "", + field_operation: str = "eq", + max_results: int = 1, + ) -> dict | None: + """Get a list of employee(s) matching given criterias. + + Args: + entity (str, optional): Entity type to query. Examples are "PerPerson" (default), + "PerPersonal", "PerEmail", "PersonKey", ... + field_name (str): Field to search in. E.g. personIdExternal, firstName, lastName, + fullName, email, dateOfBirth, gender, nationality, maritalStatus, + employeeId + field_value (str): Value to match in the Field + + Returns: + dict | None: Dictionary with the SuccessFactors object data or None in case the request failed. + + Example result values for "PerPerson" inside the "d" structure: + + "results": [ + { + '__metadata': {...}, + 'personIdExternal': '109031', + 'lastModifiedDateTime': '/Date(1442346839000+0000)/', + 'lastModifiedBy': 'admindlr', + 'createdDateTime': '/Date(1442346265000+0000)/', + 'dateOfBirth': '/Date(-501206400000)/', + 'perPersonUuid': '0378B0E6F41444EBB90345B56D537D3D', + 'createdOn': '/Date(1442353465000)/', + 'lastModifiedOn': '/Date(1442354039000)/', + 'countryOfBirth': 'RUS', + 'createdBy': 'admindlr', + 'regionOfBirth': None, + 'personId': '771', + 'personalInfoNav': {...}, + 'emergencyContactNav': {...}, + 'secondaryAssignmentsNav': {...}, + 'personEmpTerminationInfoNav': {...}, + 'phoneNav': {...}, + 'employmentNav': {...}, + ... + } + ] + + Example result values for "PerPersonal" inside the "d" structure: + + "results": [ + { + '__metadata': { + 'uri': "https://apisalesdemo2.successfactors.eu/odata/v2/PerPersonal(personIdExternal='108729',startDate=datetime'2017-03-13T00:00:00')", + 'type': 'SFOData.PerPersonal' + }, + 'personIdExternal': '108729', + 'startDate': '/Date(1489363200000)/', + 'lastModifiedDateTime': '/Date(1489442337000+0000)/', + 'endDate': '/Date(253402214400000)/', + 'createdDateTime': '/Date(1489442337000+0000)/', + 'suffix': None, + 'attachmentId': None, + 'preferredName': 'Hillary', + 'lastNameAlt1': None, + 'firstName': 'Hillary', + 'nationality': 'USA', + 'salutation': '30085', + 'maritalStatus': '10825', + 'lastName': 'Lawson', + 'gender': 'F', + 'firstNameAlt1': None, + 'createdOn': '/Date(1489445937000)/', + 'middleNameAlt1': None, + 'lastModifiedBy': '82094', + 'lastModifiedOn': '/Date(1489445937000)/', + 'createdBy': '82094', + 'middleName': None, + 'nativePreferredLang': '10249', + 'localNavAUS': {'__deferred': {...}}, + 'localNavBGD': {'__deferred': {...}}, + 'localNavHKG': {'__deferred': {...}}, + 'localNavMYS': {'__deferred': {...}}, + 'localNavAUT': {'__deferred': {...}}, + 'localNavLKA': {'__deferred': {...}}, + 'localNavPOL': {'__deferred': {...}}, + 'localNavCZE': {'__deferred': {...}}, + 'localNavTWN': {'__deferred': {...}}, + 'localNavARE': {'__deferred': {...}}, + 'localNavARG': {'__deferred': {...}}, + 'localNavCAN': {'__deferred': {...}}, + 'localNavNOR': {'__deferred': {...}}, + 'localNavOMN': {'__deferred': {...}}, + 'localNavPER': {'__deferred': {...}}, + 'localNavSGP': {'__deferred': {...}}, + 'localNavVEN': {'__deferred': {...}}, + 'localNavZAF': {'__deferred': {...}}, + 'localNavCHL': {'__deferred': {...}}, + 'localNavCHE': {'__deferred': {...}}, + 'localNavDNK': {'__deferred': {...}}, + 'localNavGTM': {'__deferred': {...}}, + 'localNavNZL': {'__deferred': {...}}, + 'salutationNav': {'__deferred': {...}}, + 'localNavCHN': {'__deferred': {...}}, + 'localNavVNM': {'__deferred': {...}}, + 'localNavIDN': {'__deferred': {...}}, + 'localNavPRT': {'__deferred': {...}}, + 'localNavCOL': {'__deferred': {...}}, + 'localNavHUN': {'__deferred': {...}}, + 'localNavSWE': {'__deferred': {...}}, + 'localNavESP': {'__deferred': {...}}, + 'localNavUSA': {'__deferred': {...}}, + 'nativePreferredLangNav': {'__deferred': {...}}, + 'maritalStatusNav': {'__deferred': {...}}, ...} + """ + + if not self._access_token: + self.authenticate() + + # Add query parameters (these are NOT passed via JSon body!) + query = {} + if field_name and field_value: + query["$filter"] = "{} {} {}".format( + field_name, field_operation, field_value + ) + if max_results > 0: + query["$top"] = max_results + encoded_query = urllib.parse.urlencode(query, doseq=True) + + request_url = self.config()["asUrl"] + entity + if query: + request_url += "?" + encoded_query + + request_header = self.request_header() + + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.status_code == 200: + return self.parse_request_response(response) + else: + logger.error( + "Failed to retrieve employee data; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_entities_metadata(self, entities: list | None = None) -> dict | None: + """Get the schema (metadata) for a list of entities (list can be empty to get it for all) + IMPORTANT: A metadata request using $metadata returns an XML serialization of the service, + including the entity data model (EDM) and the service operation descriptions. + The metadata response supports only application/xml type. + + Args: + entities (list): list of entities to deliver metadata for + + Returns: + dict | None: Dictionary with the SuccessFactors object data or None in case the request failed. + """ + + if not self._access_token: + self.authenticate() + + request_url = self.config()["asUrl"] + if entities: + request_url += "{}/".format(",".join(entities)) + request_url += "$metadata" + + request_header = self.request_header() + request_header["Accept"] = "application/xml" + + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.status_code == 200: + return xmltodict.parse(response.text) + else: + logger.error( + "Failed to retrieve entity data; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_entity_metadata(self, entity: str) -> dict | None: + """Get the schema (metadata) for an entity + + Args: + entity (str): entity to deliver metadata for + + Returns: + dict | None: Dictionary with the SuccessFactors object data or None in case the request failed. + """ + + if not self._access_token: + self.authenticate() + + if not entity: + return None + + request_url = self.config()["asUrl"] + "Entity('{}')?$format=JSON".format( + entity + ) + + request_header = self.request_header() + + response = requests.get( + request_url, headers=request_header, timeout=REQUEST_TIMEOUT + ) + if response.status_code == 200: + return self.parse_request_response(response) + else: + logger.error( + "Failed to retrieve entity data; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def update_user_email( + self, + user_id: str, # this is NOT the username but really an ID like 106020 + email_address: str, + email_type: int = 8448, # 8448 + ) -> dict: + """Update user email. + See: https://help.sap.com/docs/SAP_SUCCESSFACTORS_PLATFORM/d599f15995d348a1b45ba5603e2aba9b/7b3daeb3d77d491bb401345eede34bb5.html?locale=en-US + + Args: + user_id (str): ID of the user (e.g. 106020) + email_address (str): new email address of user + email_type (int): Type of the email. 8448 = Business + Returns: + dict: Request response or None if an error occured. + """ + + if not self._access_token: + self.authenticate() + + request_url = self.config()["asUrl"] + "upsert" + + update_data = { + "__metadata": { + "uri": "PerEmail(emailType='{}',personIdExternal='{}')".format( + email_type, user_id + ), + "type": "SFOData.PerEmail", + }, + "emailAddress": email_address, + } + + request_header = self.request_header() + + response = requests.post( + request_url, + headers=request_header, + json=update_data, + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + logger.debug( + "Email of user with ID -> %s successfully updated to -> %s.", + user_id, + email_address, + ) + return self.parse_request_response(response) + else: + logger.error( + "Failed to set email of user with ID -> %s; status -> %s; error -> %s", + user_id, + response.status_code, + response.text, + ) + return None + + # end method definition diff --git a/pyxecm/helper/__init__.py b/pyxecm/helper/__init__.py index 117047b..cbb8d5a 100644 --- a/pyxecm/helper/__init__.py +++ b/pyxecm/helper/__init__.py @@ -1,4 +1,6 @@ """pyxecm helper classes, not for direct use""" + from .assoc import Assoc from .web import HTTP from .xml import XML +from .data import Data diff --git a/pyxecm/helper/assoc.py b/pyxecm/helper/assoc.py index ad907a3..5d224bc 100644 --- a/pyxecm/helper/assoc.py +++ b/pyxecm/helper/assoc.py @@ -165,7 +165,19 @@ def dict_to_string(cls, assoc_dict: dict) -> str: @classmethod def extract_substring( cls, input_string: str, start_sequence: str, stop_sequence: str - ): + ) -> str | None: + """A generic method to extract a substring that is delimited + by a strart and stop sequence. + + Args: + input_string (str): Input string to search the delimited substring in. + start_sequence (str): Start esequence of characters. + stop_sequence (str): Stopß sequence of characters + + Returns: + str | None: the deliminated substring or None if not found. + """ + start_index = input_string.find(start_sequence) if start_index == -1: return None @@ -179,6 +191,17 @@ def extract_substring( @classmethod def extract_assoc_string(cls, input_string: str, is_escaped: bool = False) -> str: + """Extract an Assoc from a string. The assoc is deliminated by A< ... >. + + Args: + input_string (str): Input string that includes the Assoc as a substring. + is_escaped (bool, optional): Whether or not the input string includes the + assoc escaped or not. + + Returns: + str: the assoc string + """ + if is_escaped: assoc_string = cls.extract_substring(input_string, "A<", ">") else: diff --git a/pyxecm/helper/data.py b/pyxecm/helper/data.py new file mode 100644 index 0000000..fa8459f --- /dev/null +++ b/pyxecm/helper/data.py @@ -0,0 +1,1527 @@ +""" +Data Module to implement functions to leverage Pandas to +manipulte data structures read for bulk generation of Extended ECM items. + +This code implements a class called data which is referring +to Pandas DataFrame. + +Class: Payload +Methods: + +__init__ : class initializer +__len__: Lenght of the embedded DataFrame object. +__str__: Print the DataFrame of the class +get_data_frame: Get the Pandas DataFrame object +set_data_frame: Set the Pandas DataFrame object +append: Append additional data to the data frame. + +load_json_data: Load JSON data into DataFrame +save_json_data: Save JSON data from DataFrame to file +load_excel_data: Load Excel file into DataFrame +load_csv_data: Load CSV data into DataFrame +load_directory: Load directory structure into Pandas Data Frame + +partitionate: Partition a data frame into equally sized partions +deduplicate: Remove dupclicate rows that have all fields in unique_fields in common +sort: Sort the data frame based on one or multiple fields. +flatten: Flatten a sub-dictionary by copying selected fields to the + parent dictionary. +explode_and_flatten: Explode a substructure in the Data Frame +drop_columns: Drop selected columns from the Data Frame +keep_columns: Keep only selected columns from the Data Frame. Drop the rest. +cleanse: Cleanse data with regular expressions and upper/lower case conversion. +filter: Filter the DataFrame based on conditions + +fill_forward: Fill the missing cells appropriately by carrying forward + the values from the previous rows where necessary. +fill_na_in_column: Replace NA values in a column with a defined new default value +""" + +__author__ = "Dr. Marc Diefenbruch" +__copyright__ = "Copyright 2024, OpenText" +__credits__ = ["Kai-Philip Gatzweiler"] +__maintainer__ = "Dr. Marc Diefenbruch" +__email__ = "mdiefenb@opentext.com" + +import logging +import json +import os +import re +import threading + +import pandas as pd + +logger = logging.getLogger("pyxecm.helper.data") + + +class Data: + """Used to automate data loading for the customizer.""" + + _df: pd.DataFrame + _lock = threading.Lock() + + def __init__(self, init_data: pd.DataFrame | list = None): + """Initialize the Data object. + + Args: + init_data (pd.DataFrame | list, optional): Data to initialize the data frame. Can either be + another data frame (that gets copied) or a list of dictionaries. + Defaults to None. + """ + + if init_data is not None: + # if a data frame is passed to the constructor we + # copy its content to the new Data object + + if isinstance(init_data, pd.DataFrame): + self._df: pd.DataFrame = init_data.copy() + elif isinstance(init_data, Data): + if init_data.get_data_frame() is not None: + self._df: pd.DataFrame = init_data.get_data_frame().copy() + elif isinstance(init_data, list): + self._df: pd.DataFrame = pd.DataFrame(init_data) + elif isinstance(init_data, dict): + # it is important to wrap the dict in a list to avoid that more than 1 row is created + self._df: pd.DataFrame = pd.DataFrame([init_data]) + else: + logger.error("Illegal initialization data for 'Data' class!") + self._df = None + else: + self._df = None + + # end method definition + + def __len__(self) -> int: + """Lenght of the embedded DataFrame object. + This is basically a convenience method. + + Returns: + int: Lenght of the DataFrame + """ + + if self._df is not None: + return len(self._df) + return 0 + + # end method definition + + def __str__(self) -> str: + """Print the DataFrame of the class. + + Returns: + str: String representation. + """ + + # if data frame is initialized we return + # the string representation of pd.DataFrame + if self._df is not None: + return str(self._df) + + return str(self) + + # end method definition + + def lock(self): + """Return the threading lock object. + + Returns: + _type_: threading lock object + """ + return self._lock + + # end method definition + + def get_data_frame(self) -> pd.DataFrame: + """Get the Pandas DataFrame object + + Returns: + pd.DataFrame: Pandas DataFrame object + """ + + return self._df + + # end method definition + + def set_data_frame(self, df: pd.DataFrame): + """Set the Pandas DataFrame object + + Args: + df (pd.DataFrame): Pandas DataFrame object + """ + + self._df = df + + # end method definition + + def print_info( + self, + show_size: bool = True, + show_info: bool = False, + show_columns: bool = False, + show_first: bool = False, + show_last: bool = False, + show_sample: bool = False, + show_statistics: bool = False, + row_num: int = 10, + ): + """Log information about the data frame + + Args: + show_size (bool, optional): Show size of data frame. Defaults to True. + show_info (bool, optional): Show information for data frame. Defaults to False. + show_columns (bool, optional): Show columns of data frame. Defaults to False. + show_first (bool, optional): Show first 10 items. Defaults to False. + show_last (bool, optional): Show last 10 items. Defaults to False. + show_sample (bool, optional): Show 10 sample items. Defaults to False. + show_statistics (bool, optional): Show data frame statistics. Defaults to False. + """ + + if self._df is None: + logger.warning("Data Frame is not initialized!") + return + + if show_size: + logger.info( + "Data Frame has %s row(s) and %s column(s)", + self._df.shape[0], + self._df.shape[1], + ) + + if show_info: + # df.info() can not easily be embedded into a string + self._df.info() + + if show_columns: + logger.info("Columns:\n%s", self._df.columns) + logger.info( + "Columns with number of null values:\n%s", self._df.isnull().sum() + ) + logger.info( + "Columns with number of non-null values:\n%s", self._df.notnull().sum() + ) + logger.info("Columns with number of NaN values:\n%s", self._df.isna().sum()) + logger.info( + "Columns with number of non-NaN values:\n%s", self._df.notna().sum() + ) + + if show_first: + # the default for head is n = 5: + logger.info("First %s rows:\n%s", str(row_num), self._df.head(row_num)) + + if show_last: + # the default for tail is n = 5: + logger.info("Last %s rows:\n%s", str(row_num), self._df.tail(row_num)) + + if show_sample: + # the default for sample is n = 1: + logger.info("%s Sample rows:\n%s", str(row_num), self._df.sample(n=row_num)) + + if show_statistics: + logger.info( + "Description of statistics for data frame:\n%s", self._df.describe() + ) + logger.info( + "Description of statistics for data frame (Transformed):\n%s", + self._df.describe().T, + ) + logger.info( + "Description of statistics for data frame (objects):\n%s", + self._df.describe(include="object"), + ) + + # end method definition + + def append(self, add_data: pd.DataFrame | list | dict) -> bool: + """Append additional data to the data frame. + + Args: + add_data (pd.DataFrame | list | dict): Additional data. Can be pd.DataFrame or list of dicts (or Data) + + Returns: + bool: True = Success, False = Error + """ + + # Does the data frame has already content? + # Then we need to concat / append. Otherwise + # we just initialize self._df + if self._df is not None: + if isinstance(add_data, pd.DataFrame): + self._df = pd.concat([self._df, add_data], ignore_index=True) + return True + elif isinstance(add_data, Data): + df = add_data.get_data_frame() + if df: + self._df = pd.concat([self._df, df], ignore_index=True) + return True + elif isinstance(add_data, list): + if add_data: + df = Data(add_data) + self._df = pd.concat( + [self._df, df.get_data_frame()], ignore_index=True + ) + return True + elif isinstance(add_data, dict): + if add_data: + # it is important to wrap the dict in a list to avoid that more than 1 row is created + df = Data([add_data]) + self._df = pd.concat( + [self._df, df.get_data_frame()], ignore_index=True + ) + return True + else: + logger.error("Illegal data type -> '%s'", type(add_data)) + return False + else: # self._df is None (initial state) + if isinstance(add_data, pd.DataFrame): + self._df = add_data + return True + elif isinstance(add_data, Data): + self._df = add_data.get_data_frame() + return True + elif isinstance(add_data, list): + self._df = pd.DataFrame(add_data) + return True + elif isinstance(add_data, dict): + # it is important to wrap the dict in a list to avoid that more than 1 row is created + self._df = pd.DataFrame([add_data]) + return True + else: + logger.error("Illegal data type -> '%s'", type(add_data)) + return False + + # end method definition + + def load_json_data(self, json_path: str, convert_dates: bool = False) -> bool: + """Load JSON data into DataFrame + + Args: + json_path (str): Path to the JSON file. + convert_dates (bool, optional): whether or not dates should be converted + Returns: + bool: False in case an error occured, True otherwise. + """ + + if json_path is not None and os.path.exists(json_path): + # Load data from JSON file + try: + df = pd.read_json(path_or_buf=json_path, convert_dates=convert_dates) + if self._df is None: + self._df = df + else: + self._df = pd.concat([self._df, df]) + logger.info( + "After loading -> '%s' the Data Frame has %s row(s) and %s column(s)", + json_path, + self._df.shape[0], + self._df.shape[1], + ) + except FileNotFoundError: + logger.error( + "File -> %s not found. Please check the file path.", json_path + ) + return False + except PermissionError: + logger.error("Permission denied to access the file -> %s.", json_path) + return False + except IOError as e: + logger.error("An I/O error occurred -> %s", str(e)) + return False + except json.JSONDecodeError as e: + logger.error("Error: Unable to decode JSON -> %s", str(e)) + return False + except ValueError as e: + logger.error("Invalid JSON input -> %s", str(e)) + return False + except AttributeError as e: + logger.error("Unexpected data structure -> %s", str(e)) + return False + except TypeError as e: + logger.error("Unexpected data type -> %s", str(e)) + return False + except KeyError as e: + logger.error("Missing key in JSON data -> %s", str(e)) + return False + + else: + logger.error( + "Missing JSON file - you have not specified a valid path -> %s.", + json_path, + ) + return False + return True + + # end method definition + + def save_json_data( + self, json_path: str, orient: str = "records", preserve_index: bool = False + ) -> bool: + """Save JSON data from DataFrame to file + + Args: + json_path (str): Path to the JSON file. + orient (str, optional): Structure of the JSON + preserve_index (bool, optional) + Returns: + bool: False in case an error occured, True otherwise. + """ + + if json_path is not None and os.path.exists(os.path.dirname(json_path)): + # Load data from JSON file + try: + if self._df is not None: + # index parameter is only allowed if orient has one of the following values: + if ( + orient == "columns" + or orient == "index" + or orient == "table" + or orient == "split" + ): + self._df.to_json( + path_or_buf=json_path, + index=preserve_index, + orient=orient, + indent=2, + ) + else: + self._df.to_json(path_or_buf=json_path, orient=orient, indent=2) + else: + logger.warning("Data Frame is empty. Cannot write it to JSON") + return False + except FileNotFoundError: + logger.error( + "File -> '%s' not found. Please check the file path.", json_path + ) + return False + except PermissionError: + logger.error("Permission denied to access the file -> '%s'.", json_path) + return False + except IOError as e: + logger.error("An I/O error occurred -> %s", str(e)) + return False + except ValueError as e: + logger.error("Value Error -> %s", str(e)) + return False + + else: + logger.error( + "Missing JSON file -> '%s' you have not specified a valid path!", + json_path, + ) + return False + return True + + # end method definition + + def load_excel_data( + self, + xlsx_path: str, + sheet_names: str | list | None = 0, + usecols: str | list | None = None, + skip_rows: int | None = None, + header: int | None = 0, + names: list | None = None, + na_values: list | None = None, + ) -> bool: + """Load Excel (xlsx) data into DataFrame. Supports xls, xlsx, xlsm, xlsb, odf, ods and odt file extensions + read from a local filesystem or URL. Supports an option to read a single sheet or a list of sheets. + + Args: + xlsx_path (str): Path to the Excel file. + sheet_names (list | str | int, optional): Name or Index of the sheet in the Excel workbook to load. + If 'None' then all sheets will be loaded. + If 0 then first sheet in workbook will be loaded (this is the Default) + If string then this is interpreted as the name of the sheet to load. + If a list is passed, this can be a list of index values (int) or + a list of strings with the sheet names to load. + usecols (list | str, optional): List of columns to load, specified by general column names in Excel, + e.g. usecols='B:D', usecols=['A', 'C', 'F'] + skip_rows (int, optional): List of rows to skip on top of the sheet (e.g. to not read headlines) + header (int | None, optional): Excel Row (0-indexed) to use for the column labels of the parsed DataFrame. + If file contains no header row, then you should explicitly pass header=None. + Default is 0. + names (list): List of column names to use. Default is None + na_values (list, optional): List of values in the Excel that should become the Pandas NA value. + Returns: + bool: False in case an error occured, True otherwise. + """ + + if xlsx_path is not None and os.path.exists(xlsx_path): + # Load data from Excel file + try: + df = pd.read_excel( + io=xlsx_path, + sheet_name=sheet_names, + usecols=usecols, + skiprows=skip_rows, + header=header, + names=names, + na_values=na_values, + ) + # if multiple sheets from an Excel workbook are loaded, + # then read_excel() returns a dictionary. The keys are + # the names of the sheets and the values are the Data Frames. + # we handle this case as follows: + if isinstance(df, dict): + logger.info("Loading multiple Excel sheets from the workbook!") + multi_sheet_df = pd.DataFrame() + for sheet in df.keys(): + multi_sheet_df = pd.concat( + [multi_sheet_df, df[sheet]], ignore_index=True + ) + df = multi_sheet_df + if self._df is None: + self._df = df + else: + self._df = pd.concat([self._df, df], ignore_index=True) + except FileNotFoundError: + logger.error( + "File -> '%s' not found. Please check the file path.", xlsx_path + ) + return False + except PermissionError: + logger.error("Permission denied to access the file -> '%s'.", xlsx_path) + return False + except IOError as e: + logger.error("An I/O error occurred -> %s", str(e)) + return False + except ValueError as e: + logger.error("Invalid Excel input -> %s", str(e)) + return False + except AttributeError as e: + logger.error("Unexpected data structure -> %s", str(e)) + return False + except TypeError as e: + logger.error("Unexpected data type -> %s", str(e)) + return False + except KeyError as e: + logger.error("Missing key in Excel data -> %s", str(e)) + return False + + else: + logger.error( + "Missing Excel file -> '%s' you have not specified a valid path!", + xlsx_path, + ) + return False + return True + + # end method definition + + def save_excel_data( + self, excel_path: str, sheet_name: str = "Pandas Export", index: bool = False + ) -> bool: + """ + Save the DataFrame to an Excel file, with robust error handling and logging. + + Args: + excel_path (str): The file path to save the Excel file. + sheet_name (str): The sheet name where data will be saved. Default is 'Sheet1'. + index: Whether to write the row names (index). Default is False. + """ + try: + # Check if the directory exists + directory = os.path.dirname(excel_path) + if directory and not os.path.exists(directory): + raise FileNotFoundError( + "The directory -> '%s' does not exist." % directory + ) + + # Attempt to save the DataFrame to Excel + self._df.to_excel(excel_path, sheet_name=sheet_name, index=index) + logger.info("Data saved successfully to -> %s", excel_path) + + except FileNotFoundError as e: + logger.error("Error: %s", e) + return False + except PermissionError: + logger.error( + "Error: Permission denied. You do not have permission to write to '%s'.", + excel_path, + ) + return False + except ValueError as ve: + logger.error("Error: Invalid data for Excel format -> %s", ve) + return False + except OSError as oe: + logger.error("Error: OS error occurred while saving file -> %s", oe) + return False + except Exception as e: + # Catch-all for any other unexpected errors + logger.error("An unexpected error occurred -> %s", e) + return False + + return True + + # end method definition + + def load_csv_data(self, csv_path: str) -> bool: + """Load CSV (Comma separated values) data into DataFrame + + Args: + csv_path (str): Path to the CSV file. + Returns: + bool: False in case an error occured, True otherwise. + """ + + if csv_path is not None and os.path.exists(csv_path): + # Load data from CSV file + try: + df = pd.read_csv(csv_path) + if self._df is None: + self._df = df + else: + self._df = pd.concat([self._df, df]) + except FileNotFoundError: + logger.error( + "File -> '%s' not found. Please check the file path.", csv_path + ) + return False + except PermissionError: + logger.error("Permission denied to access the file -> %s.", csv_path) + return False + except IOError as e: + logger.error("An I/O error occurred -> %s", str(e)) + return False + except ValueError as e: + logger.error("Invalid CSV input -> %s", str(e)) + return False + except AttributeError as e: + logger.error("Unexpected data structure -> %s", str(e)) + return False + except TypeError as e: + logger.error("Unexpected data type -> %s", str(e)) + return False + except KeyError as e: + logger.error("Missing key in CSV data -> %s", str(e)) + return False + + else: + logger.error( + "Missing CSV file -> '%s' you have not specified a valid path!", + csv_path, + ) + return False + return True + + # end method definition + + def load_xml_data( + self, xml_path: str, xpath: str | None = None, xslt_path: str | None = None + ) -> bool: + """Load XML data into DataFrame + + Args: + xml_path (str): Path to the XML file. + xpath (str, optional): XPath to the elements we want to select + xslt_path (str, optional): XSLT transformation file + Returns: + bool: False in cause an error occured, True otherwise. + """ + + try: + df = pd.read_xml(path_or_buffer=xml_path, xpath=xpath, stylesheet=xslt_path) + # Process the loaded data as needed + if self._df is None: + self._df = df + else: + self._df = pd.concat([self._df, df]) + logger.info("XML file loaded successfully!") + return True + except FileNotFoundError: + print("File not found.") + return False + except PermissionError: + logger.error("Permission denied to access the file -> %s.", xml_path) + return False + except IOError as e: + logger.error("An I/O error occurred -> %s", str(e)) + return False + except ValueError as e: + logger.error("Invalid CSV input -> %s", str(e)) + return False + except AttributeError as e: + logger.error("Unexpected data structure -> %s", str(e)) + return False + except TypeError as e: + logger.error("Unexpected data type -> %s", str(e)) + return False + except KeyError as e: + logger.error("Missing key in CSV data -> %s", str(e)) + return False + + # end method definition + + def load_directory(self, path_to_root: str) -> bool: + """Load directory structure into Pandas Data Frame + + Args: + path_to_root (str): Path to the root element of the + directory structure + + Returns: + bool: True = Success, False = Failure + """ + + try: + # Check if the provided path is a directory + if not os.path.isdir(path_to_root): + logger.error( + "The provided path -> '%s' is not a valid directory.", path_to_root + ) + return False + + # Initialize a list to hold file information + data = [] + + # Walk through the directory + for root, _, files in os.walk(path_to_root): + for file in files: + file_path = os.path.join(root, file) + file_size = os.path.getsize(file_path) + relative_path = os.path.relpath(file_path, path_to_root) + path_parts = relative_path.split(os.sep) + + # Create a dictionary with the path parts and file details + entry = { + "level {}".format(i): part + for i, part in enumerate(path_parts[:-1], start=1) + } + entry.update({"filename": path_parts[-1], "size": file_size}) + data.append(entry) + + # Create DataFrame from list of dictionaries + self._df = pd.DataFrame(data) + + # Determine the maximum number of levels + max_levels = max((len(entry) - 2 for entry in data), default=0) + + # Ensure all entries have the same number of levels + for entry in data: + for i in range(1, max_levels + 1): + entry.setdefault("level {}".format(i), "") + + # Convert to DataFrame again to make sure all columns are consistent + self._df = pd.DataFrame(data) + + except NotADirectoryError as nde: + print(f"Error: {nde}") + except FileNotFoundError as fnfe: + print(f"Error: {fnfe}") + except PermissionError as pe: + print(f"Error: {pe}") + + return True + + # end method definition + + def load_xml_directory(self, path_to_root: str, xpath: str | None = None) -> bool: + """Load directory structure into Pandas Data Frame + + Args: + path_to_root (str): Path to the root element of the + directory structure + xpath (str, optional): XPath to the elements we want to select + + Returns: + bool: True = Success, False = Failure + """ + + try: + # Check if the provided path is a directory + if not os.path.isdir(path_to_root): + logger.error( + "The provided path -> '%s' is not a valid directory.", path_to_root + ) + return False + + # Walk through the directory + for root, _, files in os.walk(path_to_root): + for file in files: + file_path = os.path.join(root, file) + file_size = os.path.getsize(file_path) + file_name = os.path.basename(file_path) + + if file_name == "docovw.xml": + logger.info( + "Load XML file -> '%s' of size -> %s", file_path, file_size + ) + success = self.load_xml_data(file_path, xpath=xpath) + if success: + logger.info( + "Successfully loaded XML file -> '%s'", file_path + ) + + except NotADirectoryError as nde: + logger.error("Error -> %s", str(nde)) + except FileNotFoundError as fnfe: + logger.error("Error -> %s", str(fnfe)) + except PermissionError as pe: + logger.error("Error -> %s", str(pe)) + + return True + + # end method definition + + def partitionate(self, number: int) -> list: + """Partition a data frame into equally sized + partions + + Args: + number (int): Number of partitions + + Returns: + list: List of partitions + """ + + # Calculate the approximate size of each partition + size = len(self._df) + + if size >= number: + partition_size = size // number + remainder = size % number + else: + partition_size = size + number = 1 + remainder = 0 + + logger.info( + "Data set has -> %s elements. We split it into -> %s partitions with -> %s rows and remainder -> %s...", + str(size), + str(number), + str(partition_size), + str(remainder), + ) + + # Initialize a list to store partitions + partitions = [] + start_index = 0 + + # Slice the DataFrame into equally sized partitions + for i in range(number): + # start_index = i * partition_size + # end_index = (i + 1) * partition_size if i < number - 1 else None + # partition = self._df.iloc[start_index:end_index] + # partitions.append(partition) + # Calculate the end index for this partition + end_index = start_index + partition_size + (1 if i < remainder else 0) + partition = self._df.iloc[start_index:end_index] + partitions.append(partition) + start_index = end_index + + return partitions + + # end method definition + + def partitionate_by_column(self, column_name: str) -> list | None: + """Partition a data frame based on equal values in a specified column. + + Args: + column_name (str): The column name to partition by + + Returns: + list: List of partitions + """ + + if column_name not in self._df.columns: + logger.error( + "Column -> '%s' does not exist in the Data Frame. Data Frame has these columns -> %s", + column_name, + str(self._df.columns), + ) + return None + + # Separate rows with NaN or None values in the specified column + nan_partitions = self._df[self._df[column_name].isna()] + non_nan_df = self._df.dropna(subset=[column_name]) + + # Group by the specified column and create a list of DataFrames for each group + grouped = non_nan_df.groupby(column_name) + partitions = [group for _, group in grouped] + + # Add each row with NaN or None values as its own partition + for i in range(len(nan_partitions)): + partitions.append(nan_partitions.iloc[[i]]) + + logger.info( + "Data Frame has been partitioned into -> %s partitions based on the values in column '%s'...", + str(len(partitions)), + column_name, + ) + + return partitions + + # end method definition + + def deduplicate(self, unique_fields: list, inplace: bool = True) -> pd.DataFrame: + """Remove dupclicate rows that have all fields in + unique_fields in common. + + Args: + unique_fields (list): Defines the fields for which we want a unique + combination. + inplace (bool, optional): True if the deduplication happens in-place. + Defaults to True. + Returns: + pd.DataFrame | None: If inplace is False than a new deduplicatd DataFrame + is returned. Otherwise the object is modified in place + and self._df is returned. + """ + + if inplace: + self._df.drop_duplicates(subset=unique_fields, inplace=True) + self._df.reset_index(drop=True, inplace=True) + return self._df + else: + df = self._df.drop_duplicates(subset=unique_fields, inplace=False) + df = df.reset_index(drop=True, inplace=False) + return df + + # end method definition + + def sort(self, sort_fields: list, inplace: bool = True) -> pd.DataFrame: + """Sort the data frame based on one or multiple fields - + either in place or return it as a new data frame (e.g. not modifying self._df) + + Args: + sort_fields (list): Columns / fields to be used for sorting + inplace (bool, optional): If the sorting should be inplace, i.e. modifying self._df. + Defaults to True. + Returns: + pd.DataFrame: New DataFrame (if inplace = False) or self._df (if inplace = True) + """ + + if self._df is None: + return None + + if not all(sort_field in self._df.columns for sort_field in sort_fields): + logger.warning( + "Not all of the given sort fields -> %s do exist in the Data Frame.", + str(sort_fields), + ) + # Reduce the sort fields to those that really exist in the DataFrame: + sort_fields = [ + sort_field + for sort_field in sort_fields + if sort_field in self._df.columns + ] + logger.warning( + "Only these given sort fields -> %s do exist as columns in the Data Frame.", + str(sort_fields), + ) + + if inplace: + self._df.sort_values(by=sort_fields, inplace=True) + self._df.reset_index(drop=True, inplace=True) + return self._df + else: + df = self._df.sort_values(by=sort_fields, inplace=False) + df = df.reset_index(drop=True, inplace=False) + return df + + # end method definition + + def flatten( + self, + parent_field: str, + flatten_fields: list, + ): + """Flatten a sub-dictionary by copying selected fields to the + parent dictionary. This is e.g. useful for then de-duplicate + a data set. + + Args: + parent_field (str): name of the field in the parent dictionary + flatten_fields (list): fields in the sub-dictionary to copy + into the parent dictionary. + """ + + for flatten_field in flatten_fields: + flat_field = parent_field + "_" + flatten_field + # The following expression generates a new column in the + # data frame with the name of 'flat_field'. + # In the lambada function x is a dictionary that includes the subvalues + # and it returns the value of the given flatten field + # (if it exists, otherwise None). So x is self._df[parent_field], i.e. + # what the lambda function gets 'applied' on. + self._df[flat_field] = self._df[parent_field].apply( + lambda x, sub_field=flatten_field: ( + x.get(sub_field, None) if isinstance(x, dict) else None + ) + ) + + # end method definition + + def explode_and_flatten( + self, + explode_field: str | list, + flatten_fields: list | None = None, + make_unique: bool = False, + reset_index: bool = False, + split_string_to_list: bool = False, + ) -> pd.DataFrame: + """Explode a substructure in the Data Frame + + Args: + explode_field (str | list): Field(s) to explode which each has/have a list structure. + Exploding multiple columns at once is possible. This delivers + a very different result compared to exploding one column after + the other! + flatten_fields (list): Fields in the exploded substructure to include + in the main dictionaries for easier processing. + make_unique (bool, optional): if True deduplicate the exploded data frame. + flatten (bool, optional): if True flatten the exploded data frame. + Returns: + pd.DataFrame: Pointer to the Pandas DataFrame + """ + + def update_column(row): + try: + if sub in row: + return row[sub] + except (IndexError, KeyError, ValueError): + return "" + + # Define a function to split a string into a list + def string_to_list(string: str | None) -> list: + if not string or pd.isna(string): + return [] + # Use regular expression to split by comma, semicolon, or comma followed by space + return re.split(r"[;,]\s*", str(string)) + + if isinstance(explode_field, list): + logger.info("Explode multiple columns -> %s", str(explode_field)) + elif isinstance(explode_field, str): + logger.info("Explode single column -> '%s'", explode_field) + else: + logger.error( + "Illegal explode field(s) data type provided -> %s", type(explode_field) + ) + return self._df + + if split_string_to_list: + # Apply the function to convert the 'string_column' values to lists + self._df[explode_field] = self._df[explode_field].apply(string_to_list) + + try: + # remove the sub dictionary that sometimes is introduced by + # XML loading + if "." in explode_field: + main = explode_field.split(".")[0] + sub = explode_field.split(".")[1] + self._df[main] = self._df[main].apply(update_column) + explode_field = main + # Explode the field that has list values + self._df = self._df.explode(column=explode_field) + except KeyError: + logger.error("Column -> '%s' not found in Data Frame!", str(explode_field)) + except ValueError: + logger.error( + "Unable to explode the specified column -> '%s'!", str(explode_field) + ) + + if flatten_fields: + self.flatten(parent_field=explode_field, flatten_fields=flatten_fields) + + if make_unique: + self._df.drop_duplicates(subset=flatten_fields, inplace=True) + + if reset_index: + self._df.reset_index(inplace=True) + + return self._df + + # end method definition + + def drop_columns(self, column_names: list, inplace: bool = True) -> pd.DataFrame: + """Drop selected columns from the Data Frame + + Args: + column_names (list): list of column names to drop. + inplace (bool, optional): If the dropping should be inplace, i.e. modifying self._df. + Defaults to True. + Returns: + pd.DataFrame: New DataFrame (if inplace = False) or self._df (if inplace = True) + """ + + if not all(column_name in self._df.columns for column_name in column_names): + # Reduce the column names to those that really exist in the DataFrame: + column_names = [ + column_name + for column_name in column_names + if column_name in self._df.columns + ] + logger.warning( + "Reduce to these columns -> %s that do exist in the Data Frame.", + str(column_names), + ) + + if inplace: + self._df.drop(column_names, axis=1, inplace=True) + return self._df + else: + df = self._df.drop(column_names, axis=1, inplace=False) + return df + + # end method definition + + def keep_columns(self, column_names: list, inplace: bool = True) -> pd.DataFrame: + """Keep only selected columns from the Data Frame. Drop the rest. + + Args: + column_names (list): list of column names to keep. + inplace (bool, optional): If the keeping should be inplace, i.e. modifying self._df. + Defaults to True. + Returns: + pd.DataFrame: New DataFrame (if inplace = False) or self._df (if inplace = True) + """ + + if not all(column_name in self._df.columns for column_name in column_names): + # Reduce the column names to those that really exist in the DataFrame: + column_names = [ + column_name + for column_name in column_names + if column_name in self._df.columns + ] + logger.warning( + "Reduce to these columns -> %s that do exist in the Data Frame.", + column_names, + ) + + if inplace: + # keep only those columns which are in column_names: + if column_names != []: + self._df = self._df[column_names] + return self._df + else: + # keep only those columns which are in column_names: + if column_names != []: + df = self._df[column_names] + return df + return None + + # end method definition + + def cleanse(self, cleansings: dict): + """Cleanse data with regular expressions and upper/lower case conversion. + + Args: + cleansings (dict): Dictionary with keys that equal the column names. + The dictionary values are dictionaries itself with + these fields: + * replacements (dict): name of a column in the data frame + * upper (bool): change the value to uppercase + * lower (bool): change the value to lowercase + Example: + cleansings = { + "airportName": { + "upper": true + "replacements" : { + "-": " ", # replace hypen with space + ",\s*": " ", # remove commas followed by on or more spaces with a single space + "\s+$": "", # remove trailing spaces at the end of the name + "^\s+": "", # remove spaces at the beginning of the name + } + "length": 10 + } + "airportId": { + "upper": true + "replacements" : { + "K(.{3})": "\1", # if the airport has 4 charters and starts with a 'K' we remove the 'K' + "\/": "", # remove forward slashes - this helps to have consistency with N/A, NA, n/a, na + } + } + } + """ + + # Iterate over each column in regex_dict + for column, cleansing in cleansings.items(): + # "colum" is the name of the field we want to cleanse. + # "cleansing" is a dict with + if "." in column: + # Handle columns with subfields + main_field, sub_field = column.split(".") + if not main_field in self._df.columns: + continue + # we use the additional parameters for lambda (beside x) + # to avoid linter warning W0640 + self._df[main_field] = self._df[main_field].apply( + lambda x, sub_field=sub_field, cleansing=cleansing: self._cleanse_subfield( + data=x, + sub_field=sub_field, + replacements=cleansing.get("replacements", {}), + upper=cleansing.get("upper", False), + lower=cleansing.get("lower", False), + length=cleansing.get("length", 0), + ) + ) + else: + if not column in self._df.columns: + continue + + logger.debug("\nBEFORE:\n%s\n", self._df[column]) + + if cleansing.get("upper", False) and self._df[column].dtype == "object": + self._df[column] = self._df[column].str.upper() + if cleansing.get("lower", False) and self._df[column].dtype == "object": + self._df[column] = self._df[column].str.lower() + + # Handle regular columns + for regex_pattern, replacement in cleansing.get( + "replacements", {} + ).items(): + # if replacement: + # \b is a word boundary anchor in regular expressions. + # It matches a position where one side is a word character + # (like a letter or digit) and the other side is a non-word character + # (like whitespace or punctuation). It's often used to match whole words. + # regex_pattern = rf"\b{regex_pattern}\b" + # self._df[column] = self._df[column].replace( + # regex=regex_pattern, value=replacement + # ) + self._df[column] = self._df[column].str.replace( + pat=regex_pattern, repl=replacement, regex=True + ) + + if ( + cleansing.get("length", 0) > 0 + and self._df[column].dtype == "object" + ): + self._df[column] = self._df[column].str.slice( + 0, cleansing["length"] + ) + + logger.debug("\nAFTER:\n%s\n", self._df[column]) + + # end method definition + + def _cleanse_subfield( + self, + data: list | dict, + sub_field: str, + replacements: dict, + upper: bool, + lower: bool, + length: int = 0, + ) -> list | dict: + """Helper function to cleanse subfield data + + Args: + data (list | dict): sub data - either a list of dictionaries or a dictionary + sub_field (str): defines which field in the sub data should be updated + regex_replacements (dict): Dictionary of regular expressions + upper (bool): if True transform value in subfield to upper-case + lower (bool): if True, transform value in subfield to lower-case + length (int, optional): maximum length of the strings + Returns: + list | dict: Updated data + """ + + if isinstance(data, list): + # If data is a list, apply cleansing to each dictionary in the list + for i, item in enumerate(data): + if ( + item is not None + and sub_field in item + and not pd.isnull(item[sub_field]) + ): + if upper: + item[sub_field] = item[sub_field].upper() + elif lower: + item[sub_field] = item[sub_field].lower() + for regex_pattern, replacement in replacements.items(): + if replacement: + regex_pattern = rf"\b{regex_pattern}\b" + item[sub_field] = re.sub( + regex_pattern, replacement, item[sub_field] + ) + if length > 0: + item[sub_field] = item[sub_field][:length] + data[i] = item + elif isinstance(data, dict): + # If data is a dictionary, apply cleansing directly to the subfield + if sub_field in data and not pd.isnull(data[sub_field]): + if upper: + data[sub_field] = data[sub_field].upper() + elif lower: + data[sub_field] = data[sub_field].lower() + for regex_pattern, replacement in replacements.items(): + if replacement: + regex_pattern = rf"\b{regex_pattern}\b" + data[sub_field] = re.sub( + regex_pattern, replacement, data[sub_field] + ) + if length > 0: + data[sub_field] = data[sub_field][:length] + return data + + # end method definition + + def filter(self, conditions: list, inplace: bool = True) -> pd.DataFrame: + """Filter the DataFrame based on (multiple) conditions. + + Args: + conditions (list): Conditions are a list of dictionaries with 3 items: + * field (str): name of a column in the data frame + * value (str or list): expected value (filter criterium). + If it is a list then one of + the list elements must match the field value (OR) + * regex (bool): this flag controls if the value is interpreted as a + regular expression. If there is no regex item in the + dictionary then the default is False (= values is NOT regex). + If there are multiple conditions in the list each has to evaluate to True (AND) + inplace (bool, optional): Defines if the self._df is modified (inplace) or just + a new DataFrame is returned. Defaults to True. + Returns: + pd.DataFrame: new data frame or pointer to self._df (depending on the value of 'inplace') + """ + + if self._df is None: + logger.error("DataFrame is not initialized.") + return None + + if self._df.empty: + logger.error("DataFrame is empty.") + return None + + # first filtered_df is the full DataFreame. + # then it is subsequentially reduced by each condition + # at the end it is just those rows that match all conditions. + filtered_df = self._df + + # We traverse a list of conditions. Each condition must evaluate to true + # otherwise the current workspace or document (i.e. the data set for these objects) + # will be skipped. The variable filtered_df is + for condition in conditions: + field = condition.get("field", None) + if not field: + logger.error("Missing value for filter condition field in payload!") + continue + if field not in self._df.columns: + logger.warning( + "Filter condition field -> %s does not exist as column in data frame! Data frame has these columns -> %s", + field, + str(self._df.columns), + ) + continue # Skip filtering for columns not present in DataFrame + value = condition.get("value", None) + if not value: + logger.error( + "Missing filter value of for filter condition field -> '%s'!", field + ) + continue + regex = condition.get("regex", False) + + logger.info( + "Data Frame has %s row(s) and %s column(s) before filter -> %s has been applied.", + filtered_df.shape[0], + filtered_df.shape[1], + str(condition), + ) + + filtered_dfs = [] + + # if a single string is passed as value we put + # it into an 1-item list to simplify the following code: + if not isinstance(value, list): + value = [value] + + # multiple values are treated like a logical "or" condition + for value_item in value: + if regex: + filtered_dfs.append( + filtered_df[ + ~filtered_df[field].isna() + & filtered_df[field].str.contains(value_item, regex=True) + ] + ) + else: + result_df = filtered_df[ + ~filtered_df[field].isna() & filtered_df[field].eq(value_item) + ] + if not result_df.empty: + filtered_dfs.append(result_df) + # end for values + + if not filtered_dfs: + logger.warning( + "Filter with field -> '%s' and value -> '%s' delivered an empty Data Frame", + field, + str(value), + ) + filtered_df.drop(filtered_df.index, inplace=True) + else: + # Concatenate the filtered DataFrames for each value in the list + filtered_df = pd.concat(filtered_dfs, ignore_index=True) + + logger.info( + "Data Frame has %s row(s) and %s column(s) after filter -> %s has been applied.", + filtered_df.shape[0], + filtered_df.shape[1], + str(condition), + ) + # end for condition + + if inplace: + self._df = filtered_df + + return filtered_df + + # end method definition + + def fill_na_in_column(self, column_name: str, default_value: str | int): + """Replace NA values in a column with a defined new default value + + Args: + column_name (str): name of the column in the DataFrame + default_value (str | int): value to replace NA with + """ + + if column_name in self._df.columns: + self._df[column_name] = self._df[column_name].fillna(value=default_value) + else: + logger.error( + "Cannot replace NA values as column -> '%s' does not exist in the Data Frame! Data Frame has these columns -> %s", + column_name, + str(self._df.columns), + ) + + # end method definition + + def fill_forward(self, inplace: bool) -> pd.DataFrame: + """Fill the missing cells appropriately by carrying forward + the values from the previous rows where necessary. + This has applications if a hierarchy is represented by + nested cells e.g. in an Excel sheet. + + Args: + inplace (bool): Should the modification happen inplace or not. + + Returns: + pd.DataFrame: Resulting dataframe + """ + + # To convert an Excel representation of a folder structure with nested + # columns into a format appropriate for Pandas, + # where all cells should be filled + df_filled = self._df.ffill(inplace=inplace) + + return df_filled + + # end method definition + + def lookup_value( + self, lookup_column: str, lookup_value: str, separator: str = "|" + ) -> pd.Series | None: + """Lookup a row that includes a lookup value in the value of a given column. + + Args: + lookup_column (str): name of the column to search in + lookup_value (str): value to search for + separator (str): string list delimiter / separator + + Returns: + pd.Series | None: data frame row that matches or None if no match was found. + """ + + # Use the `apply` function to filter rows where the lookup value matches a whole item in the comma-separated list + def match_lookup_value(string_list: str) -> bool: + """Spilt delimiter-separated list into a python list + + Args: + string_list (str): delimiter-separated string list like "a, b, c" or "a | b | c" + + Returns: + bool: True if lookup_value is equal to one of the delimiter-separated terms + """ + return lookup_value in [ + item.strip() for item in string_list.split(separator) + ] + + df = self._df + + if self._df is None: + return None + + if lookup_column not in self._df.columns: + logger.error( + "Column -> '%s' does not exist in the Data Frame! Data Frame has these columns -> %s", + lookup_column, + str(self._df.columns), + ) + return None + + # Fill NaN or None values in the lookup column with empty strings + df[lookup_column] = df[lookup_column].fillna("") + + # Use the `apply` function to filter rows where the lookup value is in the Synonyms list + matched_row = df[df[lookup_column].apply(match_lookup_value)] + + # Return the first matched row, if any + if not matched_row.empty: + return matched_row.iloc[0] + + return None + + # end method definition + + def add_column( + self, + source_column: str, + reg_exp: str, + new_column: str, + prefix="", + suffix="", + length: int | None = None, + group_chars: int | None = None, + group_separator: str = ".", + group_remove_leading_zero: bool = True, + ) -> bool: + """Add additional column to the data frame. + + Args: + source_column (str): name of the source column + reg_exp (str): regular expression to apply on the content of the source column + new_column (str): name of the column to add + prefix (str, optional): Prefix to add in front of the value. Defaults to "". + suffix (str, optional): Suffix to add at the end of the value. Defaults to "". + length (int | None, optional): Length to reduce to. Defaults to None. + group_chars (int | None, optional): group the resulting string in characters of group_chars. Defaults to None. + group_separator (str, optional): Separator string for the grouping. Defaults to ".". + group_remove_leading_zero (bool, optional): Remove leading zeros from the groups. Defaults to True. + + Returns: + bool: True = Success, False = Failure + """ + + if self._df is None: + return False + + # Use str.extract to apply the regular expression to the source column + extracted = self._df[source_column].str.extract(pat=reg_exp, expand=False) + + # Limit the result to the specified length + if length is not None: + extracted = extracted.str[:length] + + if group_chars is not None: + + def process_grouping(x): + if pd.isna(x): + return x + # Split into groups + groups = [x[i : i + group_chars] for i in range(0, len(x), group_chars)] + if group_remove_leading_zero: + # Remove leading zeros from each group + groups = [group.lstrip("0") or "0" for group in groups] + # Join groups with separator + return group_separator.join(groups) + + extracted = extracted.apply(process_grouping) + + # Add prefix and suffix + if prefix or suffix: + extracted = prefix + extracted.astype(str) + suffix + + self._df[new_column] = extracted + + return True diff --git a/pyxecm/helper/web.py b/pyxecm/helper/web.py index 3f5f61f..defe647 100644 --- a/pyxecm/helper/web.py +++ b/pyxecm/helper/web.py @@ -20,6 +20,7 @@ import socket import time import requests +from lxml import html logger = logging.getLogger("pyxecm.web") @@ -47,7 +48,7 @@ def check_host_reachable(self, hostname: str, port: int = 80) -> bool: bool: True is reachable, False otherwise """ - logger.info( + logger.debug( "Test if host -> %s is reachable on port -> %s ...", hostname, str(port) ) try: @@ -67,7 +68,7 @@ def check_host_reachable(self, hostname: str, port: int = 80) -> bool: ) return False else: - logger.info("Host is reachable at -> %s:%s", hostname, str(port)) + logger.debug("Host is reachable at -> %s:%s", hostname, str(port)) return True # end method definition @@ -81,6 +82,8 @@ def http_request( timeout: int = 60, retries: int = 0, wait_time: int = 0, + wait_on_status: list | None = None, + show_error: bool = True, ): """Issues an http request to a given URL. @@ -93,6 +96,9 @@ def http_request( timeout (int, optional): timeout in seconds retries (int, optional): number of retries. If -1 then unlimited retries. wait_time (int, optional): number of seconds to wait after each try + wait_on_status (list, optional): list of status codes we want to wait on. If None + or empty then we wait for all return codes if + wait_time > 0 Returns: Response of call """ @@ -100,61 +106,179 @@ def http_request( if not headers: headers = requestHeaders - logger.info( - "Make HTTP Request to URL -> %s using -> %s method with payload -> %s (max number of retries = %s)", - url, - method, - str(payload), - str(retries), + message = "Make HTTP Request to URL -> {} using -> {} method".format( + url, method ) + if payload: + message += " with payload -> {}".format(payload) + if retries: + message += " (max number of retries -> {}, wait time between retries -> {})".format( + retries, wait_time + ) + try: + retries = int(retries) + except ValueError: + logger.warning( + "HTTP request -> retries is not a valid integer value: %s, defaulting to 0 retries ", + retries, + ) + retries = 0 + + logger.debug(message) try_counter = 1 while True: - response = requests.request( - method=method, url=url, data=payload, headers=headers, timeout=timeout - ) - - if not response.ok and retries == 0: - logger.error( - "HTTP request -> %s to url -> %s failed; status -> %s; error -> %s", - method, - url, - response.status_code, - response.text, + try: + response = requests.request( + method=method, + url=url, + data=payload, + headers=headers, + timeout=timeout, ) - return response - - elif response.ok: - logger.info( - "HTTP request -> %s to url -> %s succeeded with status -> %s!", - method, - url, - response.status_code, - ) - if wait_time > 0: - logger.info("Sleeping %s seconds...", wait_time) - time.sleep(wait_time) - return response - - else: + logger.debug("%s", response.text) + except Exception as exc: + response = None logger.warning( - "HTTP request -> %s to url -> %s failed (try %s); status -> %s; error -> %s", + "HTTP request -> %s to url -> %s failed failed (try %s); error -> %s", method, url, try_counter, - response.status_code, - response.text, + exc, ) - if wait_time > 0: - logger.warning( - "Sleeping %s seconds and then trying once more...", - str(wait_time), + + # do we have an error and don't want to retry? + if response is not None: + # Do we have a successful result? + if response.ok: + logger.debug( + "HTTP request -> %s to url -> %s succeeded with status -> %s!", + method, + url, + response.status_code, ) - time.sleep(wait_time) - else: - logger.warning("Trying once more...") - retries -= 1 - try_counter += 1 + + if wait_on_status and response.status_code in wait_on_status: + logger.debug( + "%s is in wait_on_status list: %s", + response.status_code, + wait_on_status, + ) + else: + return response + + elif not response.ok: + message = "HTTP request -> {} to url -> {} failed; status -> {}; error -> {}".format( + method, + url, + response.status_code, + ( + response.text + if response.headers.get("content-type") + == "application/json" + else "see debug log" + ), + ) + if show_error and retries == 0: + logger.error(message) + else: + logger.warning(message) + + # Check if another retry is allowed, if not return None + if retries == 0: + return None + + if wait_time > 0: + logger.warning( + "Sleeping %s seconds and then trying once more...", + str(wait_time), + ) + time.sleep(wait_time) + + retries -= 1 + try_counter += 1 + + # end method definition + + def download_file( + self, + url: str, + filename: str, + timeout: int = 120, + retries: int = 0, + wait_time: int = 0, + wait_on_status: list | None = None, + show_error: bool = True, + ) -> bool: + """Download a file from a URL + + Args: + url (str): URL + filename (str): filename to save + timeout (int, optional): timeout in seconds + retries (int, optional): number of retries. If -1 then unlimited retries. + wait_time (int, optional): number of seconds to wait after each try + wait_on_status (list, optional): list of status codes we want to wait on. If None + or empty then we wait for all return codes if + wait_time > 0 + + Returns: + bool: True if successful, False otherwise + """ + + response = self.http_request( + url=url, + method="GET", + retries=retries, + timeout=timeout, + wait_time=wait_time, + wait_on_status=wait_on_status, + show_error=show_error, + ) + + if response is None: + return False + + if response.ok: + with open(filename, "wb") as f: + f.write(response.content) + logger.debug("File downloaded successfully as -> %s", filename) + return True + + return False # end method definition + + def extract_content(self, url: str, xpath: str) -> str | None: + """Extract a string from a response of a HTTP request + based on an XPath. + + Args: + url (str): URL to open + xpath (str): XPath expression to apply to the result + + Returns: + str | None: Extracted string or None in case of an error. + """ + + # Send a GET request to the URL + response = requests.get(url, timeout=None) + + # Check if request was successful + if response.status_code == 200: + # Parse the HTML content + tree = html.fromstring(response.content) + + # Extract content using XPath + elements = tree.xpath(xpath) + + # Get text content of all elements and join them + content = "\n".join([elem.text_content().strip() for elem in elements]) + + # Return the extracted content + return content + else: + # If request was not successful, print error message + logger.error(response.status_code) + return None diff --git a/pyxecm/helper/xml.py b/pyxecm/helper/xml.py index 9793019..daec8b8 100644 --- a/pyxecm/helper/xml.py +++ b/pyxecm/helper/xml.py @@ -3,6 +3,10 @@ Class: XML Methods: +load_xml_file: Load an XML file into a Python list of dictionaries +load_xml_files_from_directory: Load all XML files from a directory that matches defined file names + then using the XPath to identify a set of elements and convert them + into a Python list of dictionaries. get_xml_element: Retrieve an XML Element from a string using an XPath expression modify_xml_element: Update the text (= content) of an XML element search_setting: Search a JSON-like setting inside an XML text telement @@ -21,10 +25,12 @@ import logging import os import re +import fnmatch # we need lxml instead of stadard xml.etree to have xpath capabilities! from lxml import etree import xmltodict +import zipfile # import xml.etree.ElementTree as etree from pyxecm.helper.assoc import Assoc @@ -33,7 +39,129 @@ class XML: - """XML Class to parse and update Extended ECM transport packages""" + """XML Class to handle XML processing, e.g. to parse and update Extended ECM transport packages""" + + @classmethod + def load_xml_file( + cls, file_path: str, xpath: str, dir_name: str | None = None + ) -> list | None: + """Load an XML file into a Python list of dictionaries + + Args: + file_path (str): Path to XML file + xpath (str): XPath to select sub-elements + + Returns: + dict | None: _description_ + """ + + try: + + tree = etree.parse(file_path) + if not tree: + return [] + + # Perform the XPath query to select 'child' elements + elements = tree.xpath(xpath) # Adjust XPath as needed + + # Convert the selected elements to dictionaries + results = [] + tag = xpath.split("/")[-1] + for element in elements: + element_dict = xmltodict.parse(etree.tostring(element)) + if tag in element_dict: + element_dict = element_dict[tag] + if dir_name: + element_dict["directory"] = dir_name + results.append(element_dict) + + except IOError as e: + logger.error("IO Error -> %s", str(e)) + except etree.XMLSyntaxError as e: + logger.error("XML Syntax Error -> %s", str(e)) + except etree.DocumentInvalid as e: + logger.error("Document Invalid -> %s", str(e)) + + return results + + # end method definition + + @classmethod + def load_xml_files_from_directory( + cls, path_to_root: str, filenames: list | None, xpath: str | None = None + ) -> list | None: + """Load all XML files from a directory that matches defined file names + then using the XPath to identify a set of elements and convert them + into a Python list of dictionaries. + + Args: + path_to_root (str): Path to the root element of the + directory structure + filenames (list): list of filenames. If empty all filenames ending + with ".xml" are used. + xpath (str, optional): XPath to the elements we want to select + + Returns: + list: List of dictionaries + """ + + try: + + # Check if the provided path is a directory + if not os.path.isdir(path_to_root) and not path_to_root.endswith(".zip"): + logger.error( + "The provided path '%s' is not a valid directory or Zip file.", + path_to_root, + ) + return False + + if path_to_root.endswith(".zip"): + zip_file_folder = os.path.splitext(path_to_root)[0] + if not os.path.exists(zip_file_folder): + logger.info( + "Unzipping -> '%s' into folder -> '%s'...", + path_to_root, + zip_file_folder, + ) + with zipfile.ZipFile(path_to_root, "r") as zfile: + zfile.extractall(zip_file_folder) + else: + logger.info( + "Zip file is already extracted (path -> '%s' exists). Reusing extracted data...", + zip_file_folder, + ) + path_to_root = zip_file_folder + + results = [] + + # Walk through the directory + for root, _, files in os.walk(path_to_root): + for file_data in files: + file_path = os.path.join(root, file_data) + file_size = os.path.getsize(file_path) + file_name = os.path.basename(file_path) + dir_name = os.path.dirname(file_path) + + if any( + fnmatch.fnmatch(file_path, pattern) for pattern in filenames + ) and file_name.endswith(".xml"): + logger.info( + "Load XML file -> '%s' of size -> %s", file_path, file_size + ) + results += cls.load_xml_file( + file_path, xpath=xpath, dir_name=dir_name + ) + + except NotADirectoryError as nde: + logger.error("Error -> %s", str(nde)) + except FileNotFoundError as fnfe: + logger.error("Error -> %s", str(fnfe)) + except PermissionError as pe: + logger.error("Error -> %s", str(pe)) + + return results + + # end method definition @classmethod def get_xml_element(cls, xml_content: str, xpath: str): @@ -55,6 +183,8 @@ def get_xml_element(cls, xml_content: str, xpath: str): return element + # end method definition + @classmethod def modify_xml_element(cls, xml_content: str, xpath: str, new_value: str): """Update the text (= content) of an XML element @@ -72,6 +202,8 @@ def modify_xml_element(cls, xml_content: str, xpath: str, new_value: str): else: logger.warning("XML Element -> %s not found.", xpath) + # end method definition + @classmethod def search_setting( cls, @@ -122,6 +254,8 @@ def search_setting( else: return None + # end method definition + @classmethod def replace_setting( cls, @@ -170,6 +304,8 @@ def replace_setting( return new_text + # end method definition + @classmethod def replace_in_xml_files( cls, @@ -216,8 +352,8 @@ def replace_in_xml_files( # if xpath is given we do an intelligent replacement if xpath: xml_modified = False - logger.info("Replacement with xpath...") - logger.info( + logger.debug("Replacement with xpath...") + logger.debug( "XML path -> %s, setting -> %s, assoc element -> %s", xpath, setting, @@ -225,17 +361,15 @@ def replace_in_xml_files( ) tree = etree.parse(file_path) if not tree: - logger.erro( - "Cannot parse XML tree -> {}. Skipping...".format( - file_path - ) + logger.error( + "Cannot parse XML tree -> %s. Skipping...", file_path ) continue root = tree.getroot() # find the matching XML elements using the given XPath: elements = root.xpath(xpath) if not elements: - logger.info( + logger.debug( "The XML file -> %s does not have any element with the given XML path -> %s. Skipping...", file_path, xpath, @@ -243,7 +377,7 @@ def replace_in_xml_files( continue for element in elements: # as XPath returns a list - logger.info( + logger.debug( "Found XML element -> %s in file -> %s using xpath -> %s", element.tag, filename, @@ -251,7 +385,7 @@ def replace_in_xml_files( ) # the simple case: replace the complete text of the XML element if not setting and not assoc_elem: - logger.info( + logger.debug( "Replace complete text of XML element -> %s from -> %s to -> %s", xpath, element.text, @@ -261,7 +395,7 @@ def replace_in_xml_files( xml_modified = True # In this case we want to set a complete value of a setting (basically replacing a whole line) elif setting and not assoc_elem: - logger.info( + logger.debug( "Replace single setting -> %s in XML element -> %s with new value -> %s", setting, xpath, @@ -271,7 +405,7 @@ def replace_in_xml_files( element.text, setting, is_simple=True ) if setting_value: - logger.info( + logger.debug( "Found existing setting value -> %s", setting_value, ) @@ -290,7 +424,7 @@ def replace_in_xml_files( replace_setting = ( '"' + setting + '":"' + replace_string + '"' ) - logger.info( + logger.debug( "Replacement setting -> %s", replace_setting ) element.text = cls.replace_setting( @@ -308,7 +442,7 @@ def replace_in_xml_files( continue # in this case the text is just one assoc (no setting substructure) elif not setting and assoc_elem: - logger.info( + logger.debug( "Replace single Assoc value -> %s in XML element -> %s with -> %s", assoc_elem, xpath, @@ -322,13 +456,13 @@ def replace_in_xml_files( assoc_string=assoc_string ) logger.debug("Assoc Dict -> %s", str(assoc_dict)) - assoc_dict[ - assoc_elem - ] = replace_string # escaped_replace_string + assoc_dict[assoc_elem] = ( + replace_string # escaped_replace_string + ) assoc_string_new: str = Assoc.dict_to_string( assoc_dict=assoc_dict ) - logger.info( + logger.debug( "Replace assoc with -> %s", assoc_string_new ) element.text = assoc_string_new @@ -336,7 +470,7 @@ def replace_in_xml_files( xml_modified = True # In this case we have multiple settings with their own assocs elif setting and assoc_elem: - logger.info( + logger.debug( "Replace single Assoc value -> %s in setting -> %s in XML element -> %s with -> %s", assoc_elem, setting, @@ -347,7 +481,7 @@ def replace_in_xml_files( element.text, setting, is_simple=False ) if setting_value: - logger.info( + logger.debug( "Found setting value -> %s", setting_value ) assoc_string: str = Assoc.extract_assoc_string( @@ -361,13 +495,13 @@ def replace_in_xml_files( escaped_replace_string = replace_string.replace( "'", "\\\\\u0027" ) - logger.info( + logger.debug( "Escaped replacement string -> %s", escaped_replace_string, ) - assoc_dict[ - assoc_elem - ] = escaped_replace_string # escaped_replace_string + assoc_dict[assoc_elem] = ( + escaped_replace_string # escaped_replace_string + ) assoc_string_new: str = Assoc.dict_to_string( assoc_dict=assoc_dict ) @@ -378,7 +512,7 @@ def replace_in_xml_files( replace_setting = ( '"' + setting + '":"' + assoc_string_new + '"' ) - logger.info( + logger.debug( "Replacement setting -> %s", replace_setting ) # here we need to apply a "trick". It is required @@ -407,7 +541,7 @@ def replace_in_xml_files( ) continue if xml_modified: - logger.info( + logger.debug( "XML tree has been modified. Write updated file -> %s...", file_path, ) @@ -465,7 +599,7 @@ def replace_in_xml_files( found = True # this is not using xpath - do a simple search and replace else: - logger.info("Replacement without xpath...") + logger.debug("Replacement without xpath...") with open(file_path, "r", encoding="UTF-8") as f: contents = f.read() # Replace all occurrences of the search pattern with the replace string @@ -473,7 +607,7 @@ def replace_in_xml_files( # Write the updated contents to the file if there were replacements if contents != new_contents: - logger.info( + logger.debug( "Found search string -> %s in XML file -> %s. Write updated file...", search_pattern, file_path, @@ -485,6 +619,8 @@ def replace_in_xml_files( return found + # end method definition + @classmethod def extract_from_xml_files( cls, @@ -511,18 +647,18 @@ def extract_from_xml_files( # Read the contents of the file file_path = os.path.join(subdir, filename) - logger.info("Extraction with xpath -> %s...", xpath) + logger.debug("Extraction with xpath -> %s...", xpath) tree = etree.parse(file_path) if not tree: - logger.erro( - "Cannot parse XML tree -> {}. Skipping...".format(file_path) + logger.error( + "Cannot parse XML file -> '%s'. Skipping...", file_path ) continue root = tree.getroot() # find the matching XML elements using the given XPath: elements = root.xpath(xpath) if not elements: - logger.info( + logger.debug( "The XML file -> %s does not have any element with the given XML path -> %s. Skipping...", file_path, xpath, @@ -530,7 +666,7 @@ def extract_from_xml_files( continue for element in elements: # as XPath returns a list - logger.info( + logger.debug( "Found XML element -> %s in file -> %s using xpath -> %s. Add it to result list.", element.tag, filename, @@ -551,4 +687,4 @@ def extract_from_xml_files( return extracted_data_list - # end method definition + # end method definition diff --git a/pyxecm/otac.py b/pyxecm/otac.py index 5b4b0ac..b0e2c57 100644 --- a/pyxecm/otac.py +++ b/pyxecm/otac.py @@ -8,10 +8,21 @@ config : returns config data set hostname: returns the Archive Center hostname set_hostname: sets the Archive Center hostname +credentials: Get credentials (username + password) +set_credentials: Set the credentials for Archive Center for the "ds" and "admin" users +base_url: Returns the Archive Center base URL +exec_url: Returns the Archive Center URL to execute commands +request_form_header: Deliver the FORM request header used for the SOAP calls. +request_json_header: Deliver the JSON request header used for the CRUD REST API calls. +parse_request_response: Converts the text property of a request response object to a + Python dict in a safe way that also handles exceptions. +authenticate: Authenticates at Archive Center and retrieve Ticket +authenticate_soap: Authenticate via SOAP with admin User exec_command: exec a command on Archive Center put_cert: put Certificate on Archive Center -enable_cert: enables Certitificate on Archive Center - +enable_cert: enables Certitificate on Archive Center via SOAP +enable_certificate: Enable a certificate via the new REST API + (replacing the old SOAP interface) """ __author__ = "Dr. Marc Diefenbruch" @@ -23,6 +34,7 @@ import logging import os import base64 +import json import requests from suds.client import Client @@ -30,14 +42,21 @@ logger = logging.getLogger("pyxecm.otac") -requestHeaders = {"Content-Type": "application/x-www-form-urlencoded"} +REQUEST_FORM_HEADERS = {"Content-Type": "application/x-www-form-urlencoded"} + +REQUEST_JSON_HEADERS = { + "accept": "application/json;charset=utf-8", + "Content-Type": "application/json", +} +REQUEST_TIMEOUT = 60 class OTAC: """Used to automate stettings in OpenText Archive Center.""" _config = None _soap_token: str = "" + _otac_ticket = None def __init__( self, @@ -48,6 +67,7 @@ def __init__( ds_password: str, admin_username: str, admin_password: str, + otds_ticket: str | None = None, ): """Initialize the OTAC object @@ -104,8 +124,12 @@ def __init__( otac_exec_url = otac_base_url + "/archive/admin/exec" otac_config["execUrl"] = otac_exec_url otac_config["baseUrl"] = otac_base_url + otac_config["restUrl"] = otac_base_url + "/ot-admin/rest" + otac_config["certUrl"] = otac_config["restUrl"] + "/keystore/cert/status" + otac_config["authenticationUrl"] = otac_config["restUrl"] + "/auth/users/login" self._config = otac_config + self._otac_ticket = otds_ticket def config(self) -> dict: """Returns the configuration dictionary @@ -131,6 +155,17 @@ def set_hostname(self, hostname: str): """ self.config()["hostname"] = hostname + def credentials(self) -> dict: + """Get credentials (username + password) + + Returns: + dict: dictionary with username and password + """ + return { + "username": self.config()["admin_username"], + "password": self.config()["admin_password"], + } + def set_credentials( self, ds_username: str = "", @@ -182,7 +217,162 @@ def exec_url(self) -> str: """ return self.config()["execUrl"] - def _soap_login(self): + def request_form_header(self) -> dict: + """Deliver the FORM request header used for the SOAP calls. + Consists of Token + Form Headers (see global variable) + + Args: + None. + Return: + dict: request header values + """ + + # create union of two dicts: cookie and headers + # (with Python 3.9 this would be easier with the "|" operator) + request_header = {} + request_header.update("token" + self._otac_ticket) + request_header.update(REQUEST_FORM_HEADERS) + + return request_header + + # end method definition + + def request_json_header(self) -> dict: + """Deliver the JSON request header used for the CRUD REST API calls. + Consists of Cookie + JSON Headers (see global variable) + + Args: + None. + Return: + dict: request header values + """ + + if not self._otac_ticket: + self.authenticate(revalidate=True) + + # create union of two dicts: cookie and headers + # (with Python 3.9 this would be easier with the "|" operator) + request_header = {} + request_header["Authorization"] = "token " + self._otac_ticket + request_header.update(REQUEST_JSON_HEADERS) + + return request_header + + # end method definition + + def parse_request_response( + self, + response_object: object, + additional_error_message: str = "", + show_error: bool = True, + ) -> dict | None: + """Converts the text property of a request response object to a + Python dict in a safe way that also handles exceptions. + Args: + response_object (object): this is reponse object delivered by the request call + additional_error_message (str): print a custom error message + show_error (bool): if True log an error, if False log a warning + + Returns: + dict: response or None in case of an error + """ + + if not response_object: + return None + + try: + dict_object = json.loads(response_object.text) + except json.JSONDecodeError as exception: + if additional_error_message: + message = "Cannot decode response as JSon. {}; error -> {}".format( + additional_error_message, exception + ) + else: + message = "Cannot decode response as JSon; error -> {}".format( + exception + ) + if show_error: + logger.error(message) + else: + logger.debug(message) + return None + else: + return dict_object + + # end method definition + + def authenticate(self, revalidate: bool = False) -> dict | None: + """Authenticates at Archive Center and retrieve Ticket. + + Args: + revalidate (bool, optional): determinse if a re-athentication is enforced + (e.g. if session has timed out with 401 error) + By default we use the OTDS ticket (if exists) for the authentication with OTCS. + This switch allows the forced usage of username / password for the authentication. + Returns: + dict: Cookie information of None in case of an error. + Also stores cookie information in self._cookie + """ + + # Already authenticated and session still valid? + if self._otac_ticket and not revalidate: + logger.debug( + "Session still valid - return existing ticket -> %s", + str(self._otac_ticket), + ) + return self._otac_ticket + + otac_ticket = None + + request_url = self.config()["authenticationUrl"] + # Check if previous authentication was not successful. + # Then we do the normal username + password authentication: + logger.debug( + "Requesting OTAC ticket with User/Password; calling -> %s", + request_url, + ) + + response = None + try: + response = requests.post( + url=request_url, + data=json.dumps( + self.credentials() + ), # this includes username + password + headers=REQUEST_JSON_HEADERS, + timeout=REQUEST_TIMEOUT, + ) + except requests.exceptions.RequestException as exception: + logger.warning( + "Unable to connect to -> %s; error -> %s", + request_url, + exception.strerror, + ) + logger.warning("OTAC service may not be ready yet.") + return None + + if response.ok: + authenticate_list = self.parse_request_response( + response, "This can be normal during restart", False + ) + if not authenticate_list: + return None + else: + authenticate_dict = authenticate_list[1] + otac_ticket = authenticate_dict["TOKEN"] + logger.debug("Ticket -> %s", otac_ticket) + else: + logger.error("Failed to request an OTAC ticket; error -> %s", response.text) + return None + + # Store authentication ticket: + self._otac_ticket = otac_ticket + + return self._otac_ticket + + # end method definition + + def authenticate_soap(self) -> str: """Authenticate via SOAP with admin User Args: @@ -202,13 +392,13 @@ def _soap_login(self): # end method definition - def exec_command(self, command: str): + def exec_command(self, command: str) -> dict: """Execute a command on Archive Center Args: command (str): command to execute Returns: - _type_: _description_ + dict: Response of the HTTP request. """ payload = { @@ -225,7 +415,7 @@ def exec_command(self, command: str): request_url, ) response = requests.post( - url=request_url, data=payload, headers=requestHeaders, timeout=None + url=request_url, data=payload, headers=REQUEST_FORM_HEADERS, timeout=None ) if not response.ok: logger.error( @@ -245,7 +435,7 @@ def put_cert( cert_path: str, permissions: str = "rcud", ): - """Put Certificate on Archive Center + """Put Certificate on Archive Center via SOAP Call Args: auth_id (str): ID of Certification @@ -259,7 +449,7 @@ def put_cert( # Check if the photo file exists if not os.path.isfile(cert_path): - logger.error("Certificate file -> %s not found!", cert_path) + logger.error("Certificate file -> '%s' not found!", cert_path) return None with open(file=cert_path, mode="r", encoding="utf-8") as cert_file: @@ -268,14 +458,16 @@ def put_cert( # Check that we have the pem certificate file - this is what OTAC expects. # If the file content is base64 encoded we will decode it if "BEGIN CERTIFICATE" in cert_content: - logger.info("Certificate file -> %s is not base64 encoded", cert_path) + logger.debug("Certificate file -> '%s' is not base64 encoded", cert_path) elif "BEGIN CERTIFICATE" in base64.b64decode( cert_content, validate=True ).decode("utf-8"): - logger.info("Certificate file -> %s is base64 encoded", cert_path) + logger.debug("Certificate file -> '%s' is base64 encoded", cert_path) cert_content = base64.b64decode(cert_content, validate=True).decode("utf-8") else: - logger.error("Certificate file -> %s is not in the right format", cert_path) + logger.error( + "Certificate file -> '%s' is not in the right format", cert_path + ) return None request_url = ( @@ -287,14 +479,17 @@ def put_cert( + "&permissions=" + permissions ) - logger.info( - "Putting certificate -> %s on Archive -> %s; calling -> %s", + logger.debug( + "Putting certificate -> '%s' on Archive -> '%s'; calling -> %s", cert_path, logical_archive, request_url, ) response = requests.put( - url=request_url, data=cert_content, headers=requestHeaders, timeout=None + url=request_url, + data=cert_content, + headers=REQUEST_FORM_HEADERS, + timeout=None, ) if not response.ok: @@ -302,7 +497,7 @@ def put_cert( ' %s on Archive -> %s; error -> %s", + "Failed to put certificate -> '%s' on Archive -> '%s'; error -> %s", cert_path, logical_archive, message, @@ -312,19 +507,21 @@ def put_cert( # end method definition - def enable_cert(self, auth_id: str, logical_archive: str, enable: bool = True): - """Enables Certitificate on Archive Center + def enable_cert( + self, auth_id: str, logical_archive: str, enable: bool = True + ) -> bool: + """Enables Certitificate on Archive Center via SOAP call Args: auth_id (str): Client ID logical_archive (str): Archive ID enable (bool, optional): Enable or Disable certificate. Defaults to True. Returns: - response or None if request fails. + True if certificate has been activated, False if an error has occured. """ if not self._soap_token: - self._soap_login() + self.authenticate_soap() if enable: enabled: int = 1 @@ -347,15 +544,104 @@ def enable_cert(self, auth_id: str, logical_archive: str, enable: bool = True): {"key": "CERT_FLAGS", "data": enabled}, ], ) - return response + # With SOAP, no response is a good response! + if not response: + logger.debug("Archive Center certificate has been activated.") + return True + elif response.code == 500: + logger.error( + "Failed to activate Archive Center certificate for Client -> %s on Archive -> '%s'!", + auth_id, + logical_archive, + ) + return False except WebFault as exception: logger.error( - "Failed to execute SetCertificateFlags for Client -> %s on Archive -> %s; error -> %s", + "Failed to execute SetCertificateFlags for Client -> %s on Archive -> '%s'; error -> %s", auth_id, logical_archive, exception, ) - return None + return False + + # end method definition + + def enable_certificate( + self, cert_name: str, cert_type: str, logical_archive: str | None = None + ) -> dict | None: + """Enable a certificate via the new REST API (replacing the old SOAP interface) + + Args: + cert_name (str): Name of the certificate + cert_type (str): Type of the certificate + logical_archive (str, optional): Logical archive name. If empty it is a global certificate + for all logical archives in Archive Center. + + Returns: + dict | None: REST response or None if the request fails + + Example response: + { + 'IDNO': '3', + 'CERT_NAME': 'SP_otcs-admin-0', + 'IMPORT_TIMESTAMP': '1714092017', + 'CERT_TYPE': 'ARC', + 'ASSIGNED_ARCHIVE': None, + 'FINGER_PRINT': 'B9F5 AF66 7CE6 C613 2B3C CAEE 96B6 4F79 97BB 5470 ', + 'ENABLED': True, + 'CERTIFICATE': '...', + 'PRIVILEGES': {'read': True, 'create': True, 'update': True, 'delete': True} + } + """ + + request_url = ( + self.config()["certUrl"] + + "?cert_name=" + + cert_name + + "&cert_type=" + + cert_type + ) + if logical_archive: + request_url += "&assigned_archive=" + logical_archive + + request_header = self.request_json_header() + + payload = {"ENABLED": True} + + logger.debug( + "Enabling certificate -> '%s' of type -> '%s' to Archive Center; calling -> %s", + cert_name, + cert_type, + request_url, + ) + + retries = 0 + while True: + response = requests.put( + url=request_url, + headers=request_header, + data=json.dumps(payload), + timeout=REQUEST_TIMEOUT, + ) + if response.ok: + logger.debug( + "Certificate -> '%s' has been enabled on Archive Center keystore", + cert_name, + ) + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to enable certificate -> '%s' in Archive Center; status -> %s; error -> %s", + cert_name, + response.status_code, + response.text, + ) + return None # end method definition diff --git a/pyxecm/otcs.py b/pyxecm/otcs.py index 4ad99a1..abbce94 100644 --- a/pyxecm/otcs.py +++ b/pyxecm/otcs.py @@ -3,19 +3,25 @@ such as Users, Groups, Nodes, Workspaces, ... Class: OTCS + +Class Methods: + +date_is_newer: Compare two dates, typically create or modification dates + Methods: -__init__ : class initializer -config : returns config data set -cookie : returns cookie information +__init__: class initializer +config: returns config data set +cookie: returns cookie information +otcs_ticket: Return the OTCS ticket credentials: Get credentials (username and password) set_credentials: Set new credentials hostname: Get the configured OTCS hostname set_hostname: Set the hostname of OTCS -base_url : Get OTCS base URL +base_url: Get OTCS base URL cs_url: Get the Extended ECM (OTCS) URL -rest_url : Get OTCS REST base URL - +rest_url: Get OTCS REST base URL +get_data: Get the Data object that holds all loaded Content Server items (see method load_items()) request_form_header: Deliver the request header used for the CRUD REST API calls. request_json_header: Deliver the request header for REST calls that require content type application/json. request_download_header: Deliver the request header used for download REST API calls. @@ -62,13 +68,17 @@ get_node_from_nickname: Get a node based on the nickname set_node_nickname: Assign a nickname to an Extended ECM node (e.g. workspace) get_subnodes: get children nodes of a parent node +lookup_node: lookup the node under a parent node that has a specified value in a category attribute. +get_node_columns: get custom columns configured / enabled for a node. get_node_actions: get possible actions for a node rename_node: Change the name and description of a node +delete_node: Delete a node get_volumes: Get all Volumes get_volume: Get Volume information based on the volume type ID check_node_name: Check if a a node name in a parent location has a name collision + upload_file_to_volume: Fetch a file from a URL or local filesystem and upload - it to a Extended ECM volume + it to an Extended ECM volume upload_file_to_parent: Upload a document to a parent folder add_document_version: Add a version to an Extended ECM document get_latest_document_version: Get latest version of a document node based on the node ID. @@ -98,13 +108,18 @@ get_workspace: Get a workspace node get_workspace_instances: Get all instances of a given workspace type get_workspace_by_type_and_name: Lookup workspace based on workspace type name and workspace name +get_workspace_type_location: Determine the folder in which the workspace instances of a given type reside. + Either the type ID or the type name need to be provided. get_workspace_by_business_object: Lookup workspace based by an business object of an external system +set_workspace_reference: Set reference of workspace to a business object in an external system create_workspace: Create a new business workspace +update_workspace: Update the metadata of a workspace create_workspace_relationship: Create a relationship between two workspaces get_workspace_relationships: get a list of related workspaces get_workspace_roles: Get the Workspace roles -add_member_to_workspace: Add member to workspace role. Check that the user is not yet a member -remove_member_from_workspace: Remove member from workspace role +add_workspace_member: Add member to workspace role. Check that the user is not yet a member +remove_workspace_member: Remove member from workspace role +remove_workspace_members: Remove all members from a workspace role. Check that the user is currently a member. assign_workspace_permissions: Update workspace permissions for a given role update_workspace_icon: Update a workspace with a with a new icon (which is uploaded) @@ -114,6 +129,8 @@ update_item: Update an item in Extended ECM (e.g. folder or URL item) get_document_templates: Get all document templates for a given target location create_document_from_template: Create a document based on a document template +create_wiki: Create an Extended ECM Wiki. +create_wiki_page: Create an Extended ECM wiki page. get_web_report_parameters: Get parameters of a Web Report run_web_report: Run a Web Report that is identified by its nick name @@ -129,9 +146,18 @@ get_node_categories: Get categories assigned to a node get_node_category: Get a specific category assigned to a node get_node_category_ids: Get list of all category definition IDs that are assign to the node. +get_node_category_names: Get list of all category names that are assign to the node. get_node_category_definition: Get category definition (category id and attribute IDs and types) assign_category: Assign a category to a node +get_category_value_by_name: Lookup the value of an attribute if category name, + set name and attribute name are known. +get_category_value: Lookup the value of an attribute if category ID, set ID and attribute ID + are known. If you only have the names use get_category_value_by_name() set_category_value: Set a value for a specific category attribute to a node +set_category_values: Set values of a category. Categories and have sets (groupings), multi-line sets (matrix), + and multi-value attributes (list of values). This method supports all variants. +set_category_inheritance: Set if we want a container item (e.g. a folder or workspace) to inherit + categories to sub-items. assign_classification: Assign a classification to an item assign_rm_classification: Assign a Records management classification to an item @@ -159,6 +185,13 @@ check_workspace_aviator: Check if Content Aviator is enabled for a workspace update_workspace_aviator: Enable or disable the Content Aviator for a workspace +volume_translator: Experimental code to translate the item names and item descriptions in a given hierarchy. + The actual translation is done by a tranlator object. This recursive method just + traverses the hierarchy and calls the translate() method of the translator object. + +download_document_multi_threading: Multi-threading variant of download_document() +load_items: Create a Pandas Data Frame by traversing a given Content Server hierarchy and collecting + workspace and document items. """ __author__ = "Dr. Marc Diefenbruch" @@ -172,10 +205,13 @@ import json import time import urllib.parse +import threading +import mimetypes from datetime import datetime import zipfile import requests from pyxecm.helper.xml import XML +from pyxecm.helper.data import Data logger = logging.getLogger("pyxecm.otcs") @@ -206,13 +242,67 @@ class OTCS: _cookie = None _otcs_ticket = None _otds_ticket = None + _data: Data = None + _thread_number = 3 + _download_dir = "" + + @classmethod + def date_is_newer(cls, date_old: str, date_new: str) -> bool: + """Compare two dates, typically create or modification dates + + Args: + date_old (str): the date that is considered older + date_new (str): the date that is considered newer + + Returns: + bool: True if date_new is indeed newer as date_old, False otherwise + """ + + if not date_old or not date_new: + return True + + # Define the date formats + format1 = "%Y-%m-%dT%H:%M:%SZ" # Format: "YYYY-MM-DDTHH:MM:SSZ" + format2 = "%Y-%m-%d %H:%M:%S" # Format: "YYY-MM-DD HH:MM:SS" + format3 = "%Y-%m-%dT%H:%M:%S" # Format: "YYY-MM-DD HH:MM:SS" + format4 = "%Y-%m-%d" # Format: "YYY-MM-DD" + + # Parse the dates + try: + if "T" in date_old and "Z" in date_old: + old_date = datetime.strptime(date_old, format1) + elif " " in date_old: + old_date = datetime.strptime(date_old, format2) + elif "T" in date_old: + old_date = datetime.strptime(date_old, format3) + else: + old_date = datetime.strptime(date_old, format4) + except ValueError: + return True + + try: + if "T" in date_new and "Z" in date_new: + new_date = datetime.strptime(date_new, format1) + elif " " in date_new: + new_date = datetime.strptime(date_new, format2) + elif "T" in date_new: + new_date = datetime.strptime(date_new, format3) + else: + new_date = datetime.strptime(date_new, format4) + except ValueError: + return True + + # Compare the dates + return new_date > old_date + + # end method definition def __init__( self, protocol: str, hostname: str, port: int, - public_url: str, + public_url: str | None = None, username: str | None = None, password: str | None = None, user_partition: str = "Content Server Members", @@ -220,6 +310,8 @@ def __init__( default_license: str = "X3", otds_ticket: str | None = None, base_path: str = "/cs/cs", + thread_number: int = 3, + download_dir: str = "/tmp/contentserver", ): """Initialize the OTCS object @@ -287,6 +379,10 @@ def __init__( otcs_config["baseUrl"] = otcs_base_url otcs_support_url = otcs_base_url + "/cssupport" otcs_config["supportUrl"] = otcs_support_url + + if public_url is None: + public_url = otcs_base_url + otcs_public_support_url = public_url + "/cssupport" otcs_config["supportPublicUrl"] = otcs_public_support_url @@ -348,6 +444,12 @@ def __init__( self._config = otcs_config self._otds_ticket = otds_ticket + self._data = Data() + self._thread_number = thread_number + self._download_dir = download_dir + self._semaphore = threading.BoundedSemaphore(value=thread_number) + + # end method definition def config(self) -> dict: """Returns the configuration dictionary @@ -357,6 +459,8 @@ def config(self) -> dict: """ return self._config + # end method definition + def cookie(self) -> dict: """Returns the login cookie of Extended ECM. This is set by the authenticate() method @@ -366,6 +470,19 @@ def cookie(self) -> dict: """ return self._cookie + # end method definition + + def otcs_ticket(self) -> str | None: + """Return the OTCS ticket + + Returns: + str: String with the OTCS ticket + """ + + return self._otcs_ticket + + # end method definition + def credentials(self) -> dict: """Get credentials (username + password) @@ -377,16 +494,21 @@ def credentials(self) -> dict: "password": self.config()["password"], } + # end method definition + def set_credentials(self, username: str = "admin", password: str = ""): - """Set the credentials for Extended ECM for the based on user name and password. + """Set the credentials for Extended ECM based on username and password. Args: username (str, optional): Username. Defaults to "admin". password (str, optional): Password of the user. Defaults to "". """ + self.config()["username"] = username self.config()["password"] = password + # end method definition + def hostname(self) -> str: """Returns the hostname of Extended ECM (e.g. "otcs") @@ -395,6 +517,8 @@ def hostname(self) -> str: """ return self.config()["hostname"] + # end method definition + def set_hostname(self, hostname: str): """Sets the hostname of Extended ECM @@ -403,6 +527,8 @@ def set_hostname(self, hostname: str): """ self.config()["hostname"] = hostname + # end method definition + def base_url(self) -> str: """Returns the base URL of Extended ECM @@ -411,6 +537,8 @@ def base_url(self) -> str: """ return self.config()["baseUrl"] + # end method definition + def cs_url(self) -> str: """Returns the Extended ECM URL @@ -419,6 +547,8 @@ def cs_url(self) -> str: """ return self.config()["csUrl"] + # end method definition + def cs_public_url(self) -> str: """Returns the public (external) Extended ECM URL (incl. base_path /cs/cs ) @@ -427,6 +557,8 @@ def cs_public_url(self) -> str: """ return self.config()["csPublicUrl"] + # end method definition + def cs_support_url(self) -> str: """Returns the Extended ECM Support URL @@ -435,6 +567,8 @@ def cs_support_url(self) -> str: """ return self.config()["supportUrl"] + # end method definition + def cs_support_public_url(self) -> str: """Returns the Extended ECM Public Support URL @@ -443,6 +577,8 @@ def cs_support_public_url(self) -> str: """ return self.config()["supportPublicUrl"] + # end method definition + def rest_url(self) -> str: """Returns the REST URL of Extended ECM @@ -451,6 +587,19 @@ def rest_url(self) -> str: """ return self.config()["restUrl"] + # end method definition + + def get_data(self) -> Data: + """Get the Data object that holds all loaded Content Server items (see method load_items()) + + Returns: + Data: Datastructure with all processed articles. + """ + + return self._data + + # end method definition + def request_form_header(self) -> dict: """Deliver the request header used for the CRUD REST API calls. Consists of Cookie + Form Headers (see global variable) @@ -550,7 +699,7 @@ def parse_request_response( if show_error: logger.error(message) else: - logger.warning(message) + logger.debug(message) return None else: return dict_object @@ -679,7 +828,9 @@ def exist_result_item( # data is a dict - we don't need index value: if property_name and not property_name in data: logger.error( - "There's no -> %s dictionary in data -> %s", property_name, data + "There's no dictionary -> '%s' in data -> %s", + property_name, + data, ) return False properties = data[property_name] @@ -704,7 +855,7 @@ def exist_result_item( for item in data: if property_name and not property_name in item: logger.error( - "There's no -> %s dictionary in the data list item -> %s", + "There's no dictionary -> '%s' in the data list item -> %s", property_name, item, ) @@ -762,6 +913,7 @@ def get_result_value( key: str, index: int = 0, property_name: str = "properties", + show_error: bool = True, ) -> str | None: """Read an item value from the REST API response. This is considering the most typical structures delivered by V2 REST API of Extended ECM. @@ -780,15 +932,16 @@ def get_result_value( # First do some sanity checks: if not response: - logger.info("Empty REST response - returning None") + logger.debug("Empty REST response - returning None") return None if not "results" in response: - logger.error("No 'results' key in REST response - returning None") + if show_error: + logger.error("No 'results' key in REST response - returning None") return None results = response["results"] if not results: - logger.info("No results found!") + logger.debug("No results found!") return None # check if results is a list or a dict (both is possible - dependent on the actual REST API): @@ -815,7 +968,8 @@ def get_result_value( # For nearly all OTCS REST Calls perperties is a dict: if isinstance(properties, dict): if not key in properties: - logger.error("Key -> %s is not in result properties!", key) + if show_error: + logger.error("Key -> '%s' is not in result properties!", key) return None return properties[key] # but there are some strange ones that have other names for @@ -834,7 +988,7 @@ def get_result_value( "Properties needs to be a list or dict but it is -> %s", str(type(properties)), ) - return False + return None elif isinstance(results, list): # result is a list - we need a valid index: if index > len(results) - 1: @@ -862,7 +1016,8 @@ def get_result_value( properties, ) if not key in properties: - logger.error("Key -> %s is not in result properties!", key) + if show_error: + logger.error("Key -> '%s' is not in result properties!", key) return None return properties[key] else: @@ -873,6 +1028,85 @@ def get_result_value( # end method definition + def get_result_values( + self, + response: dict, + key: str, + property_name: str = "properties", + ) -> list | None: + """Read an item value from the REST API response. This is considering + the most typical structures delivered by V2 REST API of Extended ECM. + See developer.opentext.com for more details. + + Args: + response (dict): REST API response object + key (str): key to find (e.g. "id", "name", ...) + property_name (str, optional): name of the sub dictionary holding the actual values. + Default is "properties". + Returns: + str: value of the item with the given key for None if no value is found for the given key. + """ + + # First do some sanity checks: + if not response: + logger.debug("Empty REST response - returning None") + return None + if not "results" in response: + logger.error("No 'results' key in REST response - returning None") + return None + + results = response["results"] + if not results: + logger.debug("No results found!") + return None + + # check if results is a list or a dict (both is possible - dependent on the actual REST API): + if isinstance(results, dict): + # result is a dict - we don't need index value + + # this is a special treatment for the businessworkspaces REST API - it returns + # for "Create business workspace" the ID directly in the results dict (without data substructure) + if key in results: + return [results[key]] + data = results["data"] + if isinstance(data, dict): + # data is a dict - we don't need index value: + properties = data[property_name] + elif isinstance(data, list): + # data is a list - this has typically just one item, so we use 0 as index + properties = data[0][property_name] + else: + logger.error( + "Data needs to be a list or dict but it is -> %s", str(type(data)) + ) + return None + logger.debug("Properties of results (dict) -> %s", str(properties)) + # For nearly all OTCS REST Calls properties is a dict: + if isinstance(properties, dict): + if not key in properties: + logger.error("Key -> '%s' is not in result properties!", key) + return None + return [properties[key]] + # but there are some strange ones that have other names for + # properties and may use a list - see e.g. /v2/holds + elif isinstance(properties, list): + return [item[key] for item in properties] + else: + logger.error( + "Properties needs to be a list or dict but it is -> %s", + str(type(properties)), + ) + return None + elif isinstance(results, list): + return [item["data"][property_name][key] for item in results] + else: + logger.error( + "Result needs to be a list or dict but it is -> %s", str(type(results)) + ) + return None + + # end method definition + def is_configured(self) -> bool: """Checks if the Content Server pod is configured to receive requests. @@ -884,7 +1118,7 @@ def is_configured(self) -> bool: request_url = self.config()["configuredUrl"] - logger.info("Trying to retrieve OTCS URL -> %s", request_url) + logger.debug("Trying to retrieve OTCS URL -> %s", request_url) try: response = requests.get( @@ -893,16 +1127,15 @@ def is_configured(self) -> bool: timeout=REQUEST_TIMEOUT, ) except requests.exceptions.RequestException as exception: - logger.warning( + logger.debug( "Unable to connect to -> %s; warning -> %s", request_url, - exception.strerror, + str(exception), ) - logger.warning("OTCS service may not be ready yet.") return False if not response.ok: - logger.warning( + logger.debug( "Unable to connect to -> %s; status -> %s; warning -> %s", request_url, response.status_code, @@ -925,7 +1158,7 @@ def is_ready(self) -> bool: request_url = self.config()["isReady"] - logger.info("Trying to retrieve OTCS URL -> %s", request_url) + logger.debug("Trying to retrieve OTCS URL -> %s", request_url) try: response = requests.get( @@ -934,16 +1167,15 @@ def is_ready(self) -> bool: timeout=2, ) except requests.exceptions.RequestException as exception: - logger.warning( + logger.debug( "Unable to connect to -> %s; warning -> %s", request_url, - exception.strerror, + str(exception), ) - logger.warning("OTCS service may not be ready yet.") return False if not response.status_code == 200: - logger.warning( + logger.debug( "Unable to connect to -> %s; status -> %s; warning -> %s", request_url, response.status_code, @@ -970,7 +1202,7 @@ def authenticate(self, revalidate: bool = False) -> dict | None: # Already authenticated and session still valid? if self._cookie and not revalidate: - logger.info( + logger.debug( "Session still valid - return existing cookie -> %s", str(self._cookie), ) @@ -978,17 +1210,17 @@ def authenticate(self, revalidate: bool = False) -> dict | None: otcs_ticket = None - logger.info("Wait for OTCS to be ready...") + logger.debug("Wait for OTCS to be ready...") while not self.is_ready(): - logger.warning( - "OTCS is not ready to receive requests yet. Waiting 30 seconds..." + logger.debug( + "OTCS is not ready to receive requests yet. Waiting additional 30 seconds..." ) time.sleep(30) request_url = self.config()["authenticationUrl"] if self._otds_ticket and not revalidate: - logger.info( + logger.debug( "Requesting OTCS ticket with OTDS ticket; calling -> %s", request_url, ) @@ -1015,7 +1247,7 @@ def authenticate(self, revalidate: bool = False) -> dict | None: # Check if previous authentication was not successful. # Then we do the normal username + password authentication: if not otcs_ticket: - logger.info( + logger.debug( "Requesting OTCS ticket with User/Password; calling -> %s", request_url, ) @@ -1045,7 +1277,7 @@ def authenticate(self, revalidate: bool = False) -> dict | None: return None else: otcs_ticket = authenticate_dict["ticket"] - logger.info("Ticket -> %s", otcs_ticket) + logger.debug("Ticket -> %s", otcs_ticket) else: logger.error( "Failed to request an OTCS ticket; error -> %s", response.text @@ -1107,7 +1339,7 @@ def get_server_info(self) -> dict | None: request_url = self.config()["serverInfoUrl"] request_header = self._cookie - logger.info( + logger.debug( "Retrieve Extended ECM server information; calling -> %s", request_url ) @@ -1123,7 +1355,7 @@ def get_server_info(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1172,7 +1404,7 @@ def apply_config(self, xml_file_path: str) -> dict | None: if not os.path.exists(xml_file_path): logger.error( - "The admin settings file -> %s does not exist in path -> %s!", + "The admin settings file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(xml_file_path), ) @@ -1185,8 +1417,8 @@ def apply_config(self, xml_file_path: str) -> dict | None: request_url = self.config()["importSettingsUrl"] request_header = self._cookie - logger.info( - "Applying admin settings from file -> %s; calling -> %s", + logger.debug( + "Applying admin settings from file -> '%s'; calling -> %s", xml_file_path, request_url, ) @@ -1202,12 +1434,12 @@ def apply_config(self, xml_file_path: str) -> dict | None: ) if response.ok: logger.debug( - "Admin settings in file -> %s have been applied", xml_file_path + "Admin settings in file -> '%s' have been applied", xml_file_path ) return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1222,10 +1454,10 @@ def apply_config(self, xml_file_path: str) -> dict | None: # end method definition def get_user(self, name: str, show_error: bool = False) -> dict | None: - """Lookup Extended ECM user based on the name. + """Lookup Extended ECM user based on the login name. Args: - name (str): name of the user + name (str): name of the user (login) show_error (bool): treat as error if user is not found Returns: dict: User information or None if the user is not found. @@ -1283,12 +1515,15 @@ def get_user(self, name: str, show_error: bool = False) -> dict | None: # Add query parameters (these are NOT passed via JSon body!) # type = 0 ==> User - request_url = self.config()["membersUrlv2"] + "?where_type=0&query={}".format( - name - ) + query = {"where_type": 0, "where_name": name} + encoded_query = urllib.parse.urlencode(query, doseq=True) + request_url = self.config()["membersUrlv2"] + "?{}".format(encoded_query) + request_header = self.request_form_header() - logger.info("Get user with name -> %s; calling -> %s", name, request_url) + logger.debug( + "Get user with login name -> '%s'; calling -> %s", name, request_url + ) retries = 0 while True: @@ -1302,19 +1537,19 @@ def get_user(self, name: str, show_error: bool = False) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: if show_error: logger.error( - "Failed to get user -> %s; status -> %s; error -> %s", + "Failed to get user with login -> '%s'; status -> %s; error -> %s", name, response.status_code, response.text, ) else: - logger.info("User -> %s not found.", name) + logger.debug("User with login -> '%s' not found.", name) return None # end method definition @@ -1372,7 +1607,7 @@ def add_user( request_url = self.config()["membersUrlv2"] request_header = self.request_form_header() - logger.info("Adding user -> %s; calling -> %s", name, request_url) + logger.debug("Adding user -> %s; calling -> %s", name, request_url) retries = 0 while True: @@ -1387,7 +1622,7 @@ def add_user( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1454,7 +1689,7 @@ def search_user(self, value: str, field: str = "where_name") -> dict | None: request_url = self.config()["membersUrlv2"] + "?" + field + "=" + value request_header = self.request_form_header() - logger.info( + logger.debug( "Searching user by field -> %s, value -> %s; calling -> %s", field, value, @@ -1473,7 +1708,7 @@ def search_user(self, value: str, field: str = "where_name") -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1504,7 +1739,7 @@ def update_user(self, user_id: int, field: str, value: str) -> dict | None: request_url = self.config()["membersUrlv2"] + "/" + str(user_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Updating user with ID -> %s, field -> %s, value -> %s; calling -> %s", str(user_id), field, @@ -1526,7 +1761,7 @@ def update_user(self, user_id: int, field: str, value: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1555,7 +1790,7 @@ def get_user_profile(self) -> dict | None: request_url = self.config()["membersUrlv2"] + "/preferences" request_header = self.request_form_header() - logger.info( + logger.debug( "Get profile (settings) for current user; calling -> %s", request_url, ) @@ -1573,7 +1808,7 @@ def get_user_profile(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1618,7 +1853,7 @@ def update_user_profile( request_url = self.config()["membersUrlv2"] + "/preferences" request_header = self.request_form_header() - logger.info( + logger.debug( "Updating profile for current user, field -> %s, value -> %s; calling -> %s", field, value, @@ -1640,7 +1875,7 @@ def update_user_profile( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1668,7 +1903,7 @@ def update_user_photo(self, user_id: int, photo_id: int) -> dict | None: request_url = self.config()["membersUrl"] + "/" + str(user_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Update user ID -> %s with photo ID -> %s; calling -> %s", user_id, photo_id, @@ -1688,7 +1923,7 @@ def update_user_photo(self, user_id: int, photo_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1761,7 +1996,7 @@ def get_user_proxies(self, use_v2: bool = False) -> dict | None: request_url = self.config()["membersUrl"] + "/proxies" request_header = self.request_form_header() - logger.info("Get proxy users for current user; calling -> %s", request_url) + logger.debug("Get proxy users for current user; calling -> %s", request_url) retries = 0 while True: @@ -1775,7 +2010,7 @@ def get_user_proxies(self, use_v2: bool = False) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1823,7 +2058,7 @@ def add_user_proxy( version_number = float(stripped_version) # for versions older than 23.4 we need to use - # the egacy Extended ECM for Government Proxy + # the legacy Extended ECM for Government Proxy # implementation: if version_number >= 23.4: post_dict = {} @@ -1832,7 +2067,7 @@ def add_user_proxy( post_dict["to_date"] = to_date post_data = {"body": json.dumps(post_dict)} request_url = self.config()["membersUrlv2"] + "/proxies" - logger.info( + logger.debug( "Assign proxy user with ID -> %s to current user; calling -> %s", proxy_user_id, request_url, @@ -1845,7 +2080,7 @@ def add_user_proxy( post_dict = {str(proxy_user_id): post_dict} post_data = {"add_proxy": json.dumps(post_dict)} request_url = self.config()["membersUrl"] + "/proxies" - logger.info( + logger.debug( "Assign proxy user with ID -> %s to current user (legacy xGov); calling -> %s", proxy_user_id, request_url, @@ -1867,7 +2102,7 @@ def add_user_proxy( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1893,7 +2128,7 @@ def add_favorite(self, node_id: int) -> dict | None: request_url = self.config()["favoritesUrl"] + "/" + str(node_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Adding favorite for node ID -> %s; calling -> %s", node_id, request_url ) @@ -1909,7 +2144,7 @@ def add_favorite(self, node_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1938,7 +2173,7 @@ def add_favorite_tab(self, tab_name: str, order: int) -> dict | None: request_url = self.config()["favoritesUrl"] + "/tabs" request_header = self.request_form_header() - logger.info("Adding favorite tab -> %s; calling -> %s", tab_name, request_url) + logger.debug("Adding favorite tab -> %s; calling -> %s", tab_name, request_url) retries = 0 while True: @@ -1953,7 +2188,7 @@ def add_favorite_tab(self, tab_name: str, order: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1988,12 +2223,13 @@ def get_group(self, name: str, show_error: bool = False) -> dict | None: # Add query parameters (these are NOT passed via JSon body!) # type = 1 ==> Group - request_url = self.config()["membersUrlv2"] + "?where_type=1&query={}".format( - name - ) + query = {"where_type": 1, "where_name": name} + encoded_query = urllib.parse.urlencode(query, doseq=True) + request_url = self.config()["membersUrlv2"] + "?{}".format(encoded_query) + request_header = self.request_form_header() - logger.info("Get group with name -> %s; calling -> %s", name, request_url) + logger.debug("Get group with name -> '%s'; calling -> %s", name, request_url) retries = 0 while True: @@ -2007,7 +2243,7 @@ def get_group(self, name: str, show_error: bool = False) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2019,7 +2255,7 @@ def get_group(self, name: str, show_error: bool = False) -> dict | None: response.text, ) else: - logger.info("Group -> %s not found.", name) + logger.debug("Group -> '%s' not found.", name) return None # end method definition @@ -2038,7 +2274,7 @@ def add_group(self, name: str) -> dict | None: request_url = self.config()["membersUrlv2"] request_header = self.request_form_header() - logger.info("Adding group -> %s; calling -> %s", name, request_url) + logger.debug("Adding group -> %s; calling -> %s", name, request_url) logger.debug("Group Attributes -> %s", str(group_post_body)) retries = 0 @@ -2054,7 +2290,7 @@ def add_group(self, name: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2094,7 +2330,7 @@ def get_group_members( ) request_header = self.request_form_header() - logger.info( + logger.debug( "Getting members of group with ID -> %s; calling -> %s", str(group), request_url, @@ -2112,7 +2348,7 @@ def get_group_members( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2141,7 +2377,7 @@ def add_group_member(self, member_id: int, group_id: int) -> dict | None: request_url = self.config()["membersUrlv2"] + "/" + str(group_id) + "/members" request_header = self.request_form_header() - logger.info( + logger.debug( "Adding member with ID -> %s to group with ID -> %s; calling -> %s", str(member_id), str(group_id), @@ -2161,7 +2397,7 @@ def add_group_member(self, member_id: int, group_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2176,81 +2412,54 @@ def add_group_member(self, member_id: int, group_id: int) -> dict | None: # end method definition - def get_node(self, node_id: int, timeout: int = REQUEST_TIMEOUT) -> dict | None: + def get_node( + self, + node_id: int, + fields: ( + str | list + ) = "properties", # per default we just get the most important information + metadata: bool = False, + timeout: int = REQUEST_TIMEOUT, + ) -> dict | None: """Get a node based on the node ID. Args: node_id (int) is the node Id of the node + fields (str | list, optional): Which fields to retrieve. This can have a big impact on performance! + Possible fields: + * "properties" - can further be restricted by adding sub-fields in {...} like "properties{id,name,parent_id,description}" + * "categories" + * "versions" - can further be restricted by adding ".element(0)" to just get the latest version + * "permissions" - canfurther be restricted by adding ".limit(5)" to just get the first 5 permissions + fields can either be a string (to select just one field group) or a list of strings to select multiple groups + metadata (bool, optional): Returns metadata (data type, field length, min/max values, etc.) + about data, which will be returned under results.metadata / + metadata_map / metadata_order timeout (int, optional): timeout for the request in seconds Returns: dict: Node information or None if no node with this ID is found. - "results": [ - { - "data": [ - { - "columns": [ - { - "data_type": 0, - "key": "string", - "name": "string", - "sort_key": "string" - } - ], - "properties": [ - { - "advanced_versioning": true, - "container": true, - "container_size": 0, - "create_date": "string", - "create_user_id": 0, - "description": "string", - "description_multilingual": { - "en": "string", - "de": "string" - }, - "external_create_date": "2019-08-24", - "external_identity": "string", - "external_identity_type": "string", - "external_modify_date": "2019-08-24", - "external_source": "string", - "favorite": true, - "guid": "string", - "hidden": true, - "icon": "string", - "icon_large": "string", - "id": 0, - "modify_date": "2019-08-24", - "modify_user_id": 0, - "name": "string", - "name_multilingual": { - "en": "string", - "de": "string" - }, - "owner": "string", - "owner_group_id": 0, - "owner_user_id": 0, - "parent_id": 0, - "reserved": true, - "reserved_date": "string", - "reserved_user_id": 0, - "status": 0, - "type": 0, - "type_name": "string", - "versionable": true, - "versions_control_advanced": true, - "volume_id": 0 - } - ] - } - ] - } - ] + """ - request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + query = {} + if fields: + query["fields"] = fields + + encoded_query = urllib.parse.urlencode(query, doseq=True) + + request_url = ( + self.config()["nodesUrlv2"] + + "/" + + str(node_id) + + "?{}".format(encoded_query) + ) + + if metadata: + request_url += "&metadata" + request_header = self.request_form_header() - logger.info("Get node with ID -> %s; calling -> %s", str(node_id), request_url) + logger.debug("Get node with ID -> %s; calling -> %s", str(node_id), request_url) retries = 0 while True: @@ -2265,7 +2474,7 @@ def get_node(self, node_id: int, timeout: int = REQUEST_TIMEOUT) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2291,23 +2500,46 @@ def get_node(self, node_id: int, timeout: int = REQUEST_TIMEOUT) -> dict | None: # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever logger.warning("Turn timeouts off and wait forever...") timeout = None + except requests.exceptions.ConnectionError: + if retries <= REQUEST_MAX_RETRIES: + logger.warning( + "Connection error. Retrying in %s seconds...", + str(REQUEST_RETRY_DELAY), + ) + retries += 1 + time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying + else: + logger.error( + "Failed to get node with ID -> %s; connection error", + str(node_id), + ) + # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever + logger.warning("Turn timeouts off and wait forever...") + timeout = None + time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying + + # end method definition + + def get_node_by_parent_and_name( + self, + parent_id: int, + name: str, + fields: str | list = "properties", + show_error: bool = False, + ) -> dict | None: + """Get a node based on the parent ID and name. This method does basically + a query with "where_name" and the "result" is a list. - # end method definition - - def get_node_by_parent_and_name( - self, - parent_id: int, - name: str, - fields: str = "properties", - show_error: bool = False, - ) -> dict | None: - """Get a node based on the parent ID and name. This method does basically - a query with "where_name" and the "result" is a list. - Args: parent_id (int) is the node Id of the parent node name (str) is the name of the node to get - fields (str, optional): which fields to retrieve. This can have a big impact on performance! + fields (str | list, optional): Which fields to retrieve. This can have a big impact on performance! + Possible fields: + * "properties" - can further be restricted by adding sub-fields in {...} like "properties{id,name,parent_id,description}" + * "categories" + * "versions" - can further be restricted by adding ".element(0)" to just get the latest version + * "permissions" - canfurther be restricted by adding ".limit(5)" to just get the first 5 permissions + fields can either be a string (to select just one field group) or a list of strings to select multiple groups show_error (bool, optional): treat as error if node is not found Returns: dict: Node information or None if no node with this name is found in parent. @@ -2328,8 +2560,8 @@ def get_node_by_parent_and_name( ) request_header = self.request_form_header() - logger.info( - "Get node with name -> %s and parent ID -> %s; calling -> %s", + logger.debug( + "Get node with name -> '%s' and parent ID -> %s; calling -> %s", name, str(parent_id), request_url, @@ -2347,21 +2579,21 @@ def get_node_by_parent_and_name( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: if show_error: logger.error( - "Failed to get node with name -> %s and parent ID -> %s; status -> %s; error -> %s", + "Failed to get node with name -> '%s' and parent ID -> %s; status -> %s; error -> %s", name, str(parent_id), response.status_code, response.text, ) else: - logger.info( - "Node with name -> %s and parent ID -> %s not found.", + logger.debug( + "Node with name -> '%s' and parent ID -> %s not found.", name, str(parent_id), ) @@ -2370,7 +2602,11 @@ def get_node_by_parent_and_name( # end method definition def get_node_by_workspace_and_path( - self, workspace_id: int, path: list, show_error: bool = False + self, + workspace_id: int, + path: list, + create_path: bool = False, + show_error: bool = False, ) -> dict | None: """Get a node based on the workspace ID (= node ID) and path (list of folder names). @@ -2378,36 +2614,76 @@ def get_node_by_workspace_and_path( workspace_id (int): node ID of the workspace path (list): list of container items (top down), last item is name of to be retrieved item. If path is empty the node of the volume is returned. + create_path (bool): whether or not missing folders in the path should be created show_error (bool, optional): treat as error if node is not found Returns: dict: Node information or None if no node with this path is found. """ - current_item_id = workspace_id + parent_item_id = workspace_id # in case the path is an empty list # we will have the node of the workspace: - node = self.get_node(current_item_id) + node = self.get_node(parent_item_id) for path_element in path: - node = self.get_node_by_parent_and_name(current_item_id, path_element) + node = self.get_node_by_parent_and_name(parent_item_id, path_element) current_item_id = self.get_result_value(node, "id") if not current_item_id: - if show_error: - logger.error("Cannot find path element -> %s!", path_element) + if create_path: + # create missing path element: + response = self.create_item( + parent_id=parent_item_id, + item_type=str(0), + item_name=path_element, + show_error=False, + ) + # We may have a race condition here - another thread may have created the folder in parallel + if not response: + logger.warning( + "Cannot create folder -> '%s' in workspace with ID -> %s (path -> %s), it may already exist (race condition). Try to get it...", + path_element, + workspace_id, + str(path), + ) + response = self.get_node_by_parent_and_name( + parent_id=parent_item_id, + name=path_element, + show_error=True, + ) + if not response: + if show_error: + logger.error( + "Cannot create path element -> %s!", path_element + ) + else: + logger.debug( + "Cannot create path element -> %s.", path_element + ) + return None + # now we set current item ID to the new response: + current_item_id = self.get_result_value(response, "id") + node = response + # end if create_path else: - logger.info("Cannot find path element -> %s.", path_element) - return None + if show_error: + logger.error("Cannot find path element -> %s!", path_element) + else: + logger.debug("Cannot find path element -> %s.", path_element) + return None logger.debug( - "Traversing path element -> %s (%s)", path_element, str(current_item_id) + "Traversing path element -> '%s' (%s)", + path_element, + str(current_item_id), ) + parent_item_id = current_item_id return node # end method definition def get_node_by_volume_and_path( - self, volume_type: int, path: list | None = None + self, volume_type: int, path: list | None = None, create_path: bool = False ) -> dict | None: """Get a node based on the volume and path (list of container items). @@ -2433,6 +2709,7 @@ def get_node_by_volume_and_path( "Business Workspaces" = 862 path (list): list of container items (top down), last item is name of to be retrieved item. If path is empty the node of the volume is returned. + create_path (bool): if path elements are missing: should they be created? Returns: dict: Node information or None if no node with this path is found. """ @@ -2448,7 +2725,7 @@ def get_node_by_volume_and_path( return None volume_id = self.get_result_value(response, "id") - logger.info( + logger.debug( "Volume type -> %s has node ID -> %s", str(volume_type), str(volume_id) ) @@ -2461,15 +2738,22 @@ def get_node_by_volume_and_path( for path_element in path: node = self.get_node_by_parent_and_name(current_item_id, path_element) path_item_id = self.get_result_value(node, "id") + if not path_item_id and create_path: + node = self.create_item( + parent_id=current_item_id, item_type=0, item_name=path_element + ) + path_item_id = self.get_result_value(node, "id") if not path_item_id: logger.error( - "Cannot find path element -> %s in container with ID -> %s.", + "Cannot find path element -> '%s' in container with ID -> %s.", path_element, str(current_item_id), ) return None current_item_id = path_item_id - logger.debug("Traversing path element with ID -> %s", str(current_item_id)) + logger.debug( + "Traversing path element with ID -> '%s'", str(current_item_id) + ) return node @@ -2490,7 +2774,7 @@ def get_node_from_nickname( request_url = self.config()["nicknameUrl"] + "/" + nickname + "/nodes" request_header = self.request_form_header() - logger.info( + logger.debug( "Get node with nickname -> %s; calling -> %s", nickname, request_url ) @@ -2506,7 +2790,7 @@ def get_node_from_nickname( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2518,7 +2802,7 @@ def get_node_from_nickname( response.text, ) else: - logger.info("Node with nickname -> %s not found.", nickname) + logger.debug("Node with nickname -> '%s' not found.", nickname) return None def set_node_nickname( @@ -2533,13 +2817,21 @@ def set_node_nickname( dict: Node information or None if no node with this nickname is found. """ + if not nickname: + return None + + nickname = nickname.replace("-", "_") + nickname = nickname.replace(":", "_") + nickname = nickname.replace("/", "_") + nickname = nickname.replace(" ", "_") + nickname_put_body = {"nickname": nickname} request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/nicknames" request_header = self.request_form_header() - logger.info( - "Assign nickname -> %s to node with ID -> %s; calling -> %s", + logger.debug( + "Assign nickname -> '%s' to node with ID -> %s; calling -> %s", nickname, node_id, request_url, @@ -2558,21 +2850,21 @@ def set_node_nickname( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: if show_error: logger.error( - "Failed to assign nickname -> %s to node ID -> %s; status -> %s; error -> %s", + "Failed to assign nickname -> '%s' to node ID -> %s; status -> %s; error -> %s", nickname, node_id, response.status_code, response.text, ) else: - logger.info( - "Cannot assign nickname -> %s to node ID -> %s. Maybe the nickname is already in use or the node does not exist.", + logger.debug( + "Cannot assign nickname -> '%s' to node ID -> %s. Maybe the nickname is already in use or the node does not exist.", nickname, node_id, ) @@ -2588,12 +2880,15 @@ def get_subnodes( show_hidden: bool = False, limit: int = 100, page: int = 1, - fields: str = "properties", # per default we just get the most important information + fields: ( + str | list + ) = "properties", # per default we just get the most important information + metadata: bool = False, ) -> dict | None: """Get a subnodes of a parent node ID. Args: - parent_node_id (int) is the node Id of the node + parent_node_id (int): Node Id of the node filter_node_types (int, optional): -1 get all containers -2 get all searchable objects (default) @@ -2602,9 +2897,81 @@ def get_subnodes( show_hidden (bool, optional): list also hidden items (default = False) limit (int, optional): maximum number of results (default = 100) page (int, optional): number of result page (default = 1 = 1st page) - fields (str): which fields to retrieve. This can have a big impact on performance! + fields (str | list, optional): Which fields to retrieve. This can have a big impact on performance! + Possible fields: + * "properties" - can further be restricted by adding sub-fields in {...} like "properties{id,name,parent_id,description}" + * "categories" + * "versions" - can further be restricted by adding ".element(0)" to just get the latest version + * "permissions" - canfurther be restricted by adding ".limit(5)" to just get the first 5 permissions + fields can either be a string (to select just one field group) or a list of strings to select multiple groups + metadata (bool, optional): Returns metadata (data type, field length, min/max values, etc.) + about data, which will be returned under results.metadata / + metadata_map / metadata_order Returns: dict: Subnodes information or None if no node with this parent ID is found. + Example: + + "results": [ + { + "data": [ + { + "columns": [ + { + "data_type": 0, + "key": "string", + "name": "string", + "sort_key": "string" + } + ], + "properties": [ + { + "advanced_versioning": true, + "container": true, + "container_size": 0, + "create_date": "string", + "create_user_id": 0, + "description": "string", + "description_multilingual": { + "en": "string", + "de": "string" + }, + "external_create_date": "2019-08-24", + "external_identity": "string", + "external_identity_type": "string", + "external_modify_date": "2019-08-24", + "external_source": "string", + "favorite": true, + "guid": "string", + "hidden": true, + "icon": "string", + "icon_large": "string", + "id": 0, + "modify_date": "2019-08-24", + "modify_user_id": 0, + "name": "string", + "name_multilingual": { + "en": "string", + "de": "string" + }, + "owner": "string", + "owner_group_id": 0, + "owner_user_id": 0, + "parent_id": 0, + "reserved": true, + "reserved_date": "string", + "reserved_user_id": 0, + "status": 0, + "type": 0, + "type_name": "string", + "versionable": true, + "versions_control_advanced": true, + "volume_id": 0 + } + ] + } + ] + } + ] """ # Add query parameters (these are NOT passed via JSon body!) @@ -2630,9 +2997,13 @@ def get_subnodes( + "/nodes" + "?{}".format(encoded_query) ) + + if metadata: + request_url += "&metadata" + request_header = self.request_form_header() - logger.info( + logger.debug( "Get subnodes of parent node with ID -> %s; calling -> %s", str(parent_node_id), request_url, @@ -2650,7 +3021,7 @@ def get_subnodes( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2664,15 +3035,212 @@ def get_subnodes( # end method definition + def lookup_node( + self, parent_node_id: int, category: str, attribute: str, value: str + ) -> dict: + """Lookup the node under a parent node that has a specified value in a category attribute. + + Args: + parent_node_id (int): Node ID of the parent (typically folder or workspace) + category (str): name of the category + attribute (str): name of the attribute that includes the value to match with + value (str): given lookup value + + Returns: + dict: Node or None if the REST API fails. + """ + + response = self.get_subnodes( + parent_node_id=parent_node_id, + limit=250, + fields=["properties", "categories"], + metadata=True, + ) + if not response or not response.get("results", None): + return None + + nodes = response["results"] + for node in nodes: + schema = node["metadata"]["categories"] + data = node["data"]["categories"] + for cat_data, cat_schema in zip(data, schema): + + data_values = list(cat_data.values()) + schema_values = list(cat_schema.values()) + # Schema has one additional element (the first one) representing + # the category object itself. This includes the name. We need + # to remove (pop) it from the schema list to make sure the schema list + # and the data list have the same number of items. Otherwise + # the following for loop with zip() would not properly align the + # two lists: + category_name = schema_values.pop(0)["name"] + if category_name == category: + for attr_data, attr_schema in zip(data_values, schema_values): + attr_name = attr_schema["name"] + if attr_name == attribute: + if isinstance(attr_data, list): + if value in attr_data: + return node + else: + if value == attr_data: + return node + # we can break here and continue with the next node + # as we had the right category but did not find the matching value + break + + logger.warning( + "Coudn't find a node with the value -> '%s' in the attribute -> '%s' of category -> '%s'.", + value, + attribute, + category, + ) + + return None + + # end method definition + + def get_node_columns(self, node_id: int) -> dict: + """Get custom columns configured / enabled for a node. + + Args: + node_id (int): ID of the Node. + Returns: + dict: Information of the Node columns or None if the request fails. + + Example: + { + 'links': { + 'data': {...} + }, + 'results': { + 'columns_to_display': { + 'global_columns': ['Type', 'Name', 'Size', 'Modified'], + 'inherited_columns': [ + { + 'id': 6270, + 'name': 'Title', + 'locked': False, + 'default': False, + 'has_permission': True, + 'location_id': 6271, + 'displayed': False, + 'location_name': 'Knowledge Base Articles' + }, + { + 'id': 13076, + 'name': 'Published Date', + 'locked': False, + 'default': False, + 'has_permission': True, + 'location_id': 6271, + 'displayed': False, + 'location_name': 'Knowledge Base Articles' + }, + { + 'id': 6248, + 'name': 'Valid To Date', + 'locked': False, + 'default': False, + 'has_permission': True, + 'location_id': 6271, + 'displayed': False, + 'location_name': 'Knowledge Base Articles' + }, + ... + ], + 'local_columns': { + 'available_columns': [ + { + 'id': 13072, + 'name': 'Application', + 'default': False + }, + { + 'id': 6288, + 'name': 'Approved Usage', + 'default': False + }, + { + 'id': 6262, + 'name': 'Business Function', + 'default': False + }, + ... + ], + 'displayed_columns': [...] + } + }, + 'columns_to_sort': { + 'inherited_sort': { + 'column_id': None, + 'column_name': None, + 'sort_direction': None + }, + 'local_sort': { + 'local_sort_column': [ + { + 'value': 13072, + 'name': 'Application', + 'selected': False + }, + { + 'value': 6288, + 'name': 'Approved Usage', + 'selected': False + }, + ... + ], + 'local_sort_order': [...] + } + } + } + } + """ + + request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/columns" + + request_header = self.request_form_header() + + logger.debug( + "Get columns for node with ID -> %s; calling -> %s", + str(node_id), + request_url, + ) + + retries = 0 + while True: + response = requests.get( + url=request_url, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to get actions for node with ID -> %s; status -> %s; error -> %s", + node_id, + response.status_code, + response.text, + ) + return None + + # end method definition + def get_node_actions(self, node_id: int, filter_actions: list = None) -> dict: """Get allowed actions for a node. Args: - node_id (int): _description_ - filter_actions (list, optional): _description_ - + node_id (int): ID of the Node. + filter_actions (list, optional): Optional list of actions to filter for Returns: - dict: _description_ + dict: Information of the Node actions or None if the request fails. """ actions_post_body = {"ids": [node_id], "actions": filter_actions} @@ -2681,7 +3249,7 @@ def get_node_actions(self, node_id: int, filter_actions: list = None) -> dict: request_header = self.request_form_header() - logger.info( + logger.debug( "Get actions for node with ID -> %s; calling -> %s", str(node_id), request_url, @@ -2700,7 +3268,7 @@ def get_node_actions(self, node_id: int, filter_actions: list = None) -> dict: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2745,7 +3313,7 @@ def rename_node( request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Renaming node with ID -> %s to -> %s; calling -> %s", str(node_id), name, @@ -2765,7 +3333,7 @@ def rename_node( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2780,6 +3348,48 @@ def rename_node( # end method definition + def delete_node(self, node_id: int): + """Delete an existing node + + Args: + node_id (int): ID of the node to be deleted + """ + + request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + request_header = self.request_form_header() + + logger.debug( + "Delete node with ID -> %s; calling -> %s", + str(node_id), + request_url, + ) + + retries = 0 + while True: + response = requests.delete( + url=request_url, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to delete node with ID -> %s; status -> %s; error -> %s", + str(node_id), + response.status_code, + response.text, + ) + return None + + # end method definition + def get_volumes(self) -> dict | None: """Get all Volumes. @@ -2833,7 +3443,7 @@ def get_volumes(self) -> dict | None: request_url = self.config()["volumeUrl"] request_header = self.request_form_header() - logger.info("Get volumes; calling -> %s", request_url) + logger.debug("Get volumes; calling -> %s", request_url) retries = 0 while True: @@ -2847,7 +3457,7 @@ def get_volumes(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2876,7 +3486,7 @@ def get_volume( request_url = self.config()["volumeUrl"] + "/" + str(volume_type) request_header = self.request_form_header() - logger.info( + logger.debug( "Get volume type -> %s; calling -> %s", str(volume_type), request_url ) @@ -2893,7 +3503,7 @@ def get_volume( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2920,6 +3530,23 @@ def get_volume( # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever logger.warning("Turn timeouts off and wait forever...") timeout = None + except requests.exceptions.ConnectionError: + if retries <= REQUEST_MAX_RETRIES: + logger.warning( + "Connection error. Retrying in %s seconds...", + str(REQUEST_RETRY_DELAY), + ) + retries += 1 + time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying + else: + logger.error( + "Failed to get volume type -> %s; connection error", + str(volume_type), + ) + # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever + logger.warning("Turn timeouts off and wait forever...") + timeout = None + time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying # end method definition @@ -2938,8 +3565,8 @@ def check_node_name(self, parent_id: int, node_name: str) -> dict | None: request_url = self.config()["validationUrl"] request_header = self.request_form_header() - logger.info( - "Check if node with name -> %s can be created in parent with ID -> %s; calling -> %s", + logger.debug( + "Check if node with name -> '%s' can be created in parent with ID -> %s; calling -> %s", node_name, str(parent_id), request_url, @@ -2960,12 +3587,12 @@ def check_node_name(self, parent_id: int, node_name: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to check if node name -> %s can be created in parent with ID -> %s; status -> %s; error -> %s", + "Failed to check if node name -> '%s' can be created in parent with ID -> %s; status -> %s; error -> %s", node_name, str(parent_id), response.status_code, @@ -2992,7 +3619,7 @@ def upload_file_to_volume( if package_url.startswith("http"): # Download file from remote location specified by the packageUrl # this must be a public place without authentication: - logger.info("Download transport package from URL -> %s", package_url) + logger.debug("Download transport package from URL -> %s", package_url) try: package = requests.get(url=package_url, timeout=1200) @@ -3010,7 +3637,7 @@ def upload_file_to_volume( logger.error("Request error -> %s", err.strerror) return None - logger.info( + logger.debug( "Successfully downloaded package -> %s; status code -> %s", package_url, package.status_code, @@ -3018,7 +3645,7 @@ def upload_file_to_volume( file = package.content elif os.path.exists(package_url): - logger.info("Using local package -> %s", package_url) + logger.debug("Using local package -> %s", package_url) file = open(file=package_url, mode="rb") else: @@ -3033,8 +3660,8 @@ def upload_file_to_volume( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 500 response - logger.info( - "Uploading package -> %s with mime type -> %s; calling -> %s", + logger.debug( + "Uploading package -> '%s' with mime type -> '%s'; calling -> %s", file_name, mime_type, request_url, @@ -3054,12 +3681,12 @@ def upload_file_to_volume( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to upload file -> %s to volume of type -> %s; status -> %s; error -> %s", + "Failed to upload file -> '%s' to volume of type -> %s; status -> %s; error -> %s", package_url, str(volume_type), response.status_code, @@ -3070,7 +3697,16 @@ def upload_file_to_volume( # end method definition def upload_file_to_parent( - self, file_url: str, file_name: str, mime_type: str, parent_id: int + self, + file_url: str, + file_name: str, + mime_type: str, + parent_id: int, + category_data: dict | None = None, + description: str = "", + external_modify_date: str | None = None, + external_create_date: str | None = None, + show_error: bool = True, ) -> dict | None: """Fetch a file from a URL or local filesystem and upload it to a Content Server parent (folder). @@ -3079,14 +3715,40 @@ def upload_file_to_parent( file_name (str): name of the file mime_type (str): mimeType of the file parent_id (int): parent (ID) of the file to upload - Returns: - dict: Upload response or None if the upload fails. - """ - + category_data (dict): metadata / category data + Example: category ID = 12508 + { + "12508": { + "12508_2": "Draft", # Text drop-down + "12508_3": 8559, # user ID + "12508_4": "2023-05-10", # date + "12508_6": 7357, # user ID + "12508_7": "2023-05-11", # date + "12508_5": True, # checkbox / bool + "12508_8": "EN", # text drop-down + "12508_9": "MS Word", # text drop-down + } + } + description (str, optional): description of the document + external_create_date (str, optional) value of the source system in format 2024-06-24 + external_modify_date (str, optional) value of the source system in format 2024-06-24 + show_error (bool, optional): treat as error if upload has failed + (you may not want to show an error if the file already exists) + Returns: + dict: Upload response or None if the upload fails. + """ + + if not file_name: + logger.error("Missing file name! Cannot upload file.") + return None + + # Make sure we don't have leading or trailing whitespace: + file_name = file_name.strip() + if file_url.startswith("http"): # Download file from remote location specified by the file_url parameter # this must be a public place without authentication: - logger.info("Download file from URL -> %s", file_url) + logger.debug("Download file from URL -> %s", file_url) try: response = requests.get(url=file_url, timeout=1200) @@ -3104,7 +3766,7 @@ def upload_file_to_parent( logger.error("Request error -> %s", err.strerror) return None - logger.info( + logger.debug( "Successfully downloaded file -> %s; status code -> %s", file_url, response.status_code, @@ -3112,7 +3774,7 @@ def upload_file_to_parent( file_content = response.content elif os.path.exists(file_url): - logger.info("Uploading local file -> %s", file_url) + logger.debug("Uploading local file -> %s", file_url) file_content = open(file=file_url, mode="rb") else: @@ -3123,7 +3785,16 @@ def upload_file_to_parent( "type": str(144), "name": file_name, "parent_id": str(parent_id), + "external_create_date": external_create_date, + "external_modify_date": external_modify_date, } + + if description: + upload_post_data["description"] = description + + if not mime_type: + mime_type, _ = mimetypes.guess_type(file_url) + upload_post_files = [("file", (f"{file_name}", file_content, mime_type))] request_url = self.config()["nodesUrlv2"] @@ -3131,8 +3802,8 @@ def upload_file_to_parent( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 500 response - logger.info( - "Uploading file -> %s with mime type -> %s to parent with ID -> %s; calling -> %s", + logger.debug( + "Uploading file -> '%s' with mime type -> '%s' to parent with ID -> %s; calling -> %s", file_name, mime_type, str(parent_id), @@ -3149,22 +3820,58 @@ def upload_file_to_parent( cookies=self.cookie(), timeout=None, ) - if response.ok: + if response.ok and category_data: + + parsed_response = self.parse_request_response(response) + + node_id = self.get_result_value(parsed_response, "id") + if not node_id: + logger.error("No Node ID found! Cannot set category.") + return None + # Update the categories on the documents + for category in category_data: + self.set_category_values( + node_id=node_id, + category_id=category, + category_data=category_data[category], + ) + + return parsed_response + + elif response.ok: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 - else: - logger.error( - "Failed to upload file -> %s to parent -> %s; status -> %s; error -> %s", + elif response.status_code == 500 and "already exists" in response.text: + logger.warning( + "Failed to upload file -> '%s' to parent -> %s; status -> %s; error -> %s", file_url, str(parent_id), response.status_code, response.text, ) return None + else: + if show_error: + logger.error( + "Failed to upload file -> '%s' to parent -> %s; status -> %s; error -> %s", + file_url, + str(parent_id), + response.status_code, + response.text, + ) + else: + logger.warning( + "Could not upload file -> '%s' to parent -> %s; status -> %s; error -> %s", + file_url, + str(parent_id), + response.status_code, + response.text, + ) + return None # end method definition @@ -3188,10 +3895,14 @@ def add_document_version( dict: Add version response or None if the upload fails. """ + # Desciption of a version cannot be longer than 255 characters in OTCS: + if description and len(description) > 255: + description = description[:255] + if file_url.startswith("http"): # Download file from remote location specified by the file_url parameter # this must be a public place without authentication: - logger.info("Download file from URL -> %s", file_url) + logger.debug("Download file from URL -> %s", file_url) try: response = requests.get( @@ -3212,7 +3923,7 @@ def add_document_version( logger.error("Request error -> %s", err.strerror) return None - logger.info( + logger.debug( "Successfully downloaded file -> %s; status code -> %s", file_url, response.status_code, @@ -3220,7 +3931,7 @@ def add_document_version( file_content = response.content elif os.path.exists(file_url): - logger.info("Uploading local file -> %s", file_url) + logger.debug("Uploading local file -> %s", file_url) file_content = open(file=file_url, mode="rb") else: @@ -3235,8 +3946,8 @@ def add_document_version( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 500 response - logger.info( - "Uploading file -> %s with mime type -> %s as new version to document node with ID -> %s; calling -> %s", + logger.debug( + "Uploading file -> '%s' with mime type -> '%s' as new version to document node with ID -> %s; calling -> %s", file_name, mime_type, node_id, @@ -3257,12 +3968,12 @@ def add_document_version( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to add file -> %s as new version to document with ID -> %s; status -> %s; error -> %s", + "Failed to add file -> '%s' as new version to document with ID -> %s; status -> %s; error -> %s", file_url, str(node_id), response.status_code, @@ -3286,7 +3997,7 @@ def get_latest_document_version(self, node_id: int) -> dict | None: ) request_header = self.request_form_header() - logger.info( + logger.debug( "Get latest version of document with node ID -> %s; calling -> %s", str(node_id), request_url, @@ -3304,7 +4015,7 @@ def get_latest_document_version(self, node_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3348,7 +4059,7 @@ def get_document_content(self, node_id: int, version_number: str = "") -> bytes: ) request_header = self.request_download_header() - logger.info( + logger.debug( "Retrieve document with node ID -> %s and version -> %s; calling -> %s", str(node_id), str(version_number), @@ -3368,16 +4079,20 @@ def get_document_content(self, node_id: int, version_number: str = "") -> bytes: break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 - else: + elif response is not None: logger.error( "Failed to download document with node ID -> %s; status -> %s; error -> %s", str(node_id), response.status_code, response.text, ) + else: + logger.error( + "Failed to download document with node ID -> %s;", str(node_id) + ) return None return content @@ -3402,7 +4117,7 @@ def download_document( directory = os.path.dirname(file_path) if not os.path.exists(directory): - logger.error("Directory -> %s does not exist", directory) + logger.error("Directory -> '%s' does not exist", directory) return False if not version_number: @@ -3423,7 +4138,7 @@ def download_document( ) request_header = self.request_download_header() - logger.info( + logger.debug( "Download document with node ID -> %s; calling -> %s", str(node_id), request_url, @@ -3442,7 +4157,7 @@ def download_document( break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3454,7 +4169,7 @@ def download_document( ) return False - logger.info("Writing document content to file -> %s", file_path) + logger.debug("Writing document content to file -> %s", file_path) # Open file in write binary mode with open(file=file_path, mode="wb") as file: @@ -3487,7 +4202,7 @@ def download_config_file( # request_header = self.cookie() request_header = self.request_download_header() - logger.info("Download config file from URL -> %s", request_url) + logger.debug("Download config file from URL -> %s", request_url) try: response = requests.get( @@ -3513,8 +4228,8 @@ def download_config_file( content = response.content if search: - logger.info( - "Search for all occurances of %s in the config file and replace them with %s", + logger.debug( + "Search for all occurances of '%s' in the config file and replace them with '%s'", search, replace, ) @@ -3525,8 +4240,8 @@ def download_config_file( # Write the content to the file file.write(content) - logger.info( - "Successfully downloaded config file -> %s to -> %s; status code -> %s", + logger.debug( + "Successfully downloaded config file -> %s to -> '%s'; status code -> %s", request_url, file_path, response.status_code, @@ -3585,7 +4300,7 @@ def search( request_url = self.config()["searchUrl"] request_header = self.request_form_header() - logger.info("Search for term -> %s; calling -> %s", search_term, request_url) + logger.debug("Search for term -> %s; calling -> %s", search_term, request_url) retries = 0 while True: @@ -3600,7 +4315,7 @@ def search( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3625,13 +4340,14 @@ def get_external_system_connection( Returns: dict: External system Details or None if the REST call fails. """ - + # Encode special characters in connection_name + connection_name = connection_name.replace("\\", "0xF0A6").replace("/", "0xF0A7") request_url = ( self.config()["externalSystemUrl"] + "/" + connection_name + "/config" ) request_header = self.cookie() - logger.info( + logger.debug( "Get external system connection -> %s; calling -> %s", connection_name, request_url, @@ -3649,7 +4365,7 @@ def get_external_system_connection( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3661,7 +4377,7 @@ def get_external_system_connection( response.text, ) else: - logger.info("External system -> %s not found.", connection_name) + logger.debug("External system -> '%s' not found.", connection_name) return None # end method definition @@ -3713,8 +4429,8 @@ def add_external_system_connection( request_url = self.config()["externalSystemUrl"] request_header = self.cookie() - logger.info( - "Creating external system connection -> %s of type -> %s with URL -> %s; calling -> %s", + logger.debug( + "Creating external system connection -> '%s' of type -> '%s' with URL -> %s; calling -> %s", connection_name, connection_type, as_url, @@ -3734,7 +4450,7 @@ def add_external_system_connection( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3762,7 +4478,7 @@ def create_transport_workbench(self, workbench_name: str) -> dict | None: request_url = self.config()["nodesUrlv2"] request_header = self.request_form_header() - logger.info( + logger.debug( "Create transport workbench -> %s; calling -> %s", workbench_name, request_url, @@ -3780,7 +4496,7 @@ def create_transport_workbench(self, workbench_name: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3811,7 +4527,7 @@ def unpack_transport_package( request_url = self.config()["nodesUrlv2"] + "/" + str(package_id) + "/unpack" request_header = self.request_form_header() - logger.info( + logger.debug( "Unpack transport package with ID -> %s into workbench with ID -> %s; calling -> %s", str(package_id), str(workbench_id), @@ -3831,7 +4547,7 @@ def unpack_transport_package( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3858,7 +4574,7 @@ def deploy_workbench(self, workbench_id: int) -> dict | None: request_url = self.config()["nodesUrlv2"] + "/" + str(workbench_id) + "/deploy" request_header = self.request_form_header() - logger.info( + logger.debug( "Deploy workbench with ID -> %s; calling -> %s", str(workbench_id), request_url, @@ -3889,7 +4605,7 @@ def deploy_workbench(self, workbench_id: int) -> dict | None: return response_dict # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3943,7 +4659,7 @@ def deploy_transport( if not transport_root_volume_id: logger.error("Failed to retrieve transport root volume") return None - logger.info("Transport root volume ID -> %s", str(transport_root_volume_id)) + logger.debug("Transport root volume ID -> %s", str(transport_root_volume_id)) response = self.get_node_by_parent_and_name( transport_root_volume_id, "Transport Packages" @@ -3952,48 +4668,50 @@ def deploy_transport( if not transport_package_volume_id: logger.error("Failed to retrieve transport package volume") return None - logger.info( + logger.debug( "Transport package volume ID -> %s", str(transport_package_volume_id) ) # Step 1: Upload Transport Package - logger.info("Check if transport package -> %s already exists...", package_name) + logger.debug( + "Check if transport package -> '%s' already exists...", package_name + ) response = self.get_node_by_parent_and_name( transport_package_volume_id, package_name ) package_id = self.get_result_value(response, "id") if package_id: - logger.info( - "Transport package -> %s does already exist; existing package ID -> %s", + logger.debug( + "Transport package -> '%s' does already exist; existing package ID -> %s", package_name, str(package_id), ) else: - logger.info( - "Transport package -> %s does not yet exist, loading from -> %s", + logger.debug( + "Transport package -> '%s' does not yet exist, loading from -> %s", package_name, package_url, ) # If we have string replacements configured execute them now: if replacements: - logger.info( - "Transport -> %s has replacements -> %s", + logger.debug( + "Transport -> '%s' has replacements -> %s", package_name, str(replacements), ) self.replace_transport_placeholders(package_url, replacements) else: - logger.info("Transport -> %s has no replacements!", package_name) + logger.debug("Transport -> '%s' has no replacements!", package_name) # If we have data extractions configured execute them now: if extractions: - logger.info( - "Transport -> %s has extractions -> %s", + logger.debug( + "Transport -> '%s' has extractions -> %s", package_name, str(extractions), ) self.extract_transport_data(package_url, extractions) else: - logger.info("Transport -> %s has no extractions!", package_name) + logger.debug("Transport -> '%s' has no extractions!", package_name) # Upload package to Extended ECM: response = self.upload_file_to_volume( package_url, package_name, "application/zip", 531 @@ -4002,15 +4720,17 @@ def deploy_transport( if not package_id: logger.error("Failed to upload transport package -> %s", package_url) return None - logger.info( - "Successfully uploaded transport package -> %s; new package ID -> %s", + logger.debug( + "Successfully uploaded transport package -> '%s'; new package ID -> %s", package_name, str(package_id), ) # Step 2: Create Transport Workbench (if not yet exist) workbench_name = package_name.split(".")[0] - logger.info("Check if workbench -> %s is already deployed...", workbench_name) + logger.debug( + "Check if workbench -> '%s' is already deployed...", workbench_name + ) # check if the package name has the suffix "(deployed)" - this indicates it is alreadey # successfully deployed (see renaming at the end of this method) response = self.get_node_by_parent_and_name( @@ -4018,22 +4738,22 @@ def deploy_transport( ) workbench_id = self.get_result_value(response, "id") if workbench_id: - logger.info( - "Workbench -> %s has already been deployed successfully; existing workbench ID -> %s; skipping transport", + logger.debug( + "Workbench -> '%s' has already been deployed successfully; existing workbench ID -> %s; skipping transport", workbench_name, str(workbench_id), ) # we return and skip this transport... return response else: - logger.info("Check if workbench -> %s already exists...", workbench_name) + logger.debug("Check if workbench -> '%s' already exists...", workbench_name) response = self.get_node_by_parent_and_name( transport_root_volume_id, workbench_name ) workbench_id = self.get_result_value(response, "id") if workbench_id: - logger.info( - "Workbench -> %s does already exist but is not successfully deployed; existing workbench ID -> %s", + logger.debug( + "Workbench -> '%s' does already exist but is not successfully deployed; existing workbench ID -> %s", workbench_name, str(workbench_id), ) @@ -4041,17 +4761,17 @@ def deploy_transport( response = self.create_transport_workbench(workbench_name) workbench_id = self.get_result_value(response, "id") if not workbench_id: - logger.error("Failed to create workbench -> %s", workbench_name) + logger.error("Failed to create workbench -> '%s'", workbench_name) return None - logger.info( - "Successfully created workbench -> %s; new workbench ID -> %s", + logger.debug( + "Successfully created workbench -> '%s'; new workbench ID -> %s", workbench_name, str(workbench_id), ) # Step 3: Unpack Transport Package to Workbench - logger.info( - "Unpack transport package -> %s (%s) to workbench -> %s (%s)", + logger.debug( + "Unpack transport package -> '%s' (%s) to workbench -> '%s' (%s)", package_name, str(package_id), workbench_name, @@ -4059,23 +4779,23 @@ def deploy_transport( ) response = self.unpack_transport_package(package_id, workbench_id) if not response: - logger.error("Failed to unpack the transport package -> %s", package_name) + logger.error("Failed to unpack the transport package -> '%s'", package_name) return None - logger.info( - "Successfully unpackaged to workbench -> %s (%s)", + logger.debug( + "Successfully unpackaged to workbench -> '%s' (%s)", workbench_name, str(workbench_id), ) # Step 4: Deploy Workbench - logger.info("Deploy workbench -> %s (%s)", workbench_name, str(workbench_id)) + logger.debug("Deploy workbench -> '%s' (%s)", workbench_name, str(workbench_id)) response = self.deploy_workbench(workbench_id) if not response: - logger.error("Failed to deploy workbench -> %s", workbench_name) + logger.error("Failed to deploy workbench -> '%s'", workbench_name) return None - logger.info( - "Successfully deployed workbench -> %s (%s)", + logger.debug( + "Successfully deployed workbench -> '%s' (%s)", workbench_name, str(workbench_id), ) @@ -4104,7 +4824,7 @@ def replace_transport_placeholders( """ if not os.path.isfile(zip_file_path): - logger.error("Zip file -> %s not found.", zip_file_path) + logger.error("Zip file -> '%s' not found.", zip_file_path) return False # Extract the zip file to a temporary directory @@ -4122,8 +4842,8 @@ def replace_transport_placeholders( ) continue if "enabled" in replacement and not replacement["enabled"]: - logger.info( - "Replacement for transport -> %s is disabled. Skipping...", + logger.debug( + "Replacement for transport -> '%s' is disabled. Skipping...", zip_file_path, ) continue @@ -4131,17 +4851,17 @@ def replace_transport_placeholders( # 1. XPath - more elegant and powerful # 2. Search & Replace - basically treat the XML file like a text file and do a search & replace if "xpath" in replacement: - logger.info( + logger.debug( "Using xpath -> %s to narrow down the replacement", replacement["xpath"], ) if "setting" in replacement: - logger.info( + logger.debug( "Looking up setting -> %s in XML element", replacement["setting"], ) if "assoc_elem" in replacement: - logger.info( + logger.debug( "Looking up assoc element -> %s in XML element", replacement["assoc_elem"], ) @@ -4152,12 +4872,12 @@ def replace_transport_placeholders( ) continue if replacement.get("placeholder") == replacement["value"]: - logger.info( + logger.debug( "Placeholder and replacement are identical -> %s. Skipping...", replacement["value"], ) continue - logger.info( + logger.debug( "Replace -> %s with -> %s in Transport package -> %s", replacement["placeholder"], replacement["value"], @@ -4173,7 +4893,7 @@ def replace_transport_placeholders( replacement.get("assoc_elem"), ) if found: - logger.info( + logger.debug( "Replacement -> %s has been completed successfully for Transport package -> %s", replacement, zip_file_folder, @@ -4197,8 +4917,8 @@ def replace_transport_placeholders( new_zip_file_path = ( os.path.dirname(zip_file_path) + "/new_" + os.path.basename(zip_file_path) ) - logger.info( - "Content of transport -> %s has been modified - repacking to new zip file -> %s", + logger.debug( + "Content of transport -> '%s' has been modified - repacking to new zip file -> %s", zip_file_folder, new_zip_file_path, ) @@ -4216,14 +4936,14 @@ def replace_transport_placeholders( old_zip_file_path = ( os.path.dirname(zip_file_path) + "/old_" + os.path.basename(zip_file_path) ) - logger.info( - "Rename orginal transport zip file -> %s to -> %s", + logger.debug( + "Rename orginal transport zip file -> '%s' to -> '%s'", zip_file_path, old_zip_file_path, ) os.rename(zip_file_path, old_zip_file_path) - logger.info( - "Rename new transport zip file -> %s to -> %s", + logger.debug( + "Rename new transport zip file -> '%s' to -> '%s'", new_zip_file_path, zip_file_path, ) @@ -4248,7 +4968,7 @@ def extract_transport_data(self, zip_file_path: str, extractions: list) -> bool: """ if not os.path.isfile(zip_file_path): - logger.error("Zip file -> %s not found.", zip_file_path) + logger.error("Zip file -> '%s' not found.", zip_file_path) return False # Extract the zip file to a temporary directory @@ -4264,14 +4984,14 @@ def extract_transport_data(self, zip_file_path: str, extractions: list) -> bool: ) continue if "enabled" in extraction and not extraction["enabled"]: - logger.info( - "Extraction for transport -> %s is disabled. Skipping...", + logger.debug( + "Extraction for transport -> '%s' is disabled. Skipping...", zip_file_path, ) continue xpath = extraction["xpath"] - logger.info( + logger.debug( "Using xpath -> %s to extract the data", xpath, ) @@ -4282,7 +5002,7 @@ def extract_transport_data(self, zip_file_path: str, extractions: list) -> bool: xpath, ) if extracted_data: - logger.info( + logger.debug( "Extraction with XPath -> %s has been successfully completed for Transport package -> %s", xpath, zip_file_folder, @@ -4315,7 +5035,7 @@ def get_business_object_types(self) -> dict | None: request_url = self.config()["businessObjectTypesUrl"] request_header = self.request_form_header() - logger.info( + logger.debug( "Get all business object types; calling -> %s", request_url, ) @@ -4332,7 +5052,7 @@ def get_business_object_types(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -4425,8 +5145,8 @@ def get_business_object_type( ) request_header = self.request_form_header() - logger.info( - "Get business object type -> %s for external system -> %s; calling -> %s", + logger.debug( + "Get business object type -> '%s' for external system -> %s; calling -> %s", type_name, external_system_id, request_url, @@ -4444,12 +5164,12 @@ def get_business_object_type( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to get business object type -> %s; status -> %s; error -> %s", + "Failed to get business object type -> '%s'; status -> %s; error -> %s", type_name, response.status_code, response.text, @@ -4547,8 +5267,8 @@ def get_business_objects( request_url = self.config()["businessObjectsUrl"] + "?{}".format(encoded_query) request_header = self.request_form_header() - logger.info( - "Get all business objects of type -> %s from external system -> %s; calling -> %s", + logger.debug( + "Get all business objects of type -> '%s' from external system -> %s; calling -> %s", type_name, external_system_id, request_url, @@ -4566,12 +5286,12 @@ def get_business_objects( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to get business objects of type -> %s from external system -> %s; status -> %s; error -> %s", + "Failed to get business objects of type -> '%s' from external system -> %s; status -> %s; error -> %s", type_name, external_system_id, response.status_code, @@ -4609,8 +5329,8 @@ def get_business_objects_search( ) request_header = self.request_form_header() - logger.info( - "Get search form for business object type -> %s and external system -> %s; calling -> %s", + logger.debug( + "Get search form for business object type -> '%s' and external system -> %s; calling -> %s", type_name, external_system_id, request_url, @@ -4628,12 +5348,12 @@ def get_business_objects_search( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to get search form for business object type -> %s and external system -> %s; status -> %s; error -> %s", + "Failed to get search form for business object type -> '%s' and external system -> %s; status -> %s; error -> %s", type_name, external_system_id, response.status_code, @@ -4651,8 +5371,8 @@ def get_workspace_types( Args: expand_workspace_info (bool, optional): Controls if the workspace info is returned as well - expand_workspace_info (bool, optional): Controls if the list of workspace templates - per workspace typ is returned as well + expand_templates (bool, optional): Controls if the list of workspace templates + per workspace typ is returned as well Returns: dict: Workspace Types or None if the request fails. @@ -4698,7 +5418,7 @@ def get_workspace_types( request_header = self.request_form_header() - logger.info("Get workspace types; calling -> %s", request_url) + logger.debug("Get workspace types; calling -> %s", request_url) retries = 0 while True: @@ -4712,7 +5432,7 @@ def get_workspace_types( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -4728,18 +5448,18 @@ def get_workspace_types( def get_workspace_create_form( self, template_id: int, - external_system_id: int = None, - bo_type: int = None, - bo_id: int = None, + external_system_id: str = None, + bo_type: str = None, + bo_id: str = None, parent_id: int = None, ) -> dict | None: """Get the Workspace create form. Args: template_id (int): ID of the workspace template - external_system_id (int, optional): Identifier of the external system (None if no external system) - bo_type (int, optional): Business object type (None if no external system) - bo_id (int, optional): Business object identifier / key (None if no external system) + external_system_id (str, optional): Identifier of the external system (None if no external system) + bo_type (str, optional): Business object type (None if no external system) + bo_id (str, optional): Business object identifier / key (None if no external system) parent_id (int, optional): Parent ID of the workspaces. Needs only be specified in special cases where workspace location cannot be derived from workspace type definition, e.g. sub-workspace @@ -4758,15 +5478,15 @@ def get_workspace_create_form( request_url += "&ext_system_id={}".format(external_system_id) request_url += "&bo_type={}".format(bo_type) request_url += "&bo_id={}".format(bo_id) - logger.info( + logger.debug( "Include business object connection -> (%s, %s, %s) in workspace create form...", - str(external_system_id), - str(bo_type), - str(bo_id), + external_system_id, + bo_type, + bo_id, ) request_header = self.request_form_header() - logger.info( + logger.debug( "Get workspace create form for workspace template ID -> %s; calling -> %s", str(template_id), request_url, @@ -4784,16 +5504,25 @@ def get_workspace_create_form( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: - logger.error( - "Failed to get workspace create form for template -> %s; status -> %s; error -> %s", - str(template_id), - response.status_code, - response.text, - ) + if parent_id: + logger.error( + "Failed to get workspace create form for template -> %s and parent ID -> %s; status -> %s; error -> %s", + str(template_id), + str(parent_id), + response.status_code, + response.text, + ) + else: + logger.error( + "Failed to get workspace create form for template -> %s (no parent ID); status -> %s; error -> %s", + str(template_id), + response.status_code, + response.text, + ) return None # end method definition @@ -4878,7 +5607,9 @@ def get_workspace(self, node_id: int) -> dict | None: } }, 'metadata': {...}, - 'metadata_order': {...} + 'metadata_order': { + 'categories': ['16878'] + } } ], 'wksp_info': { @@ -4901,7 +5632,7 @@ def get_workspace(self, node_id: int) -> dict | None: request_url = self.config()["businessWorkspacesUrl"] + "/" + str(node_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Get workspace with ID -> %s; calling -> %s", str(node_id), request_url ) @@ -4917,7 +5648,7 @@ def get_workspace(self, node_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -4961,6 +5692,8 @@ def get_workspace_by_type_and_name( type_id: int = None, name: str = "", expanded_view: bool = True, + page: int | None = None, + limit: int | None = None, ) -> dict | None: """Lookup workspace based on workspace type and workspace name. @@ -4977,6 +5710,12 @@ def get_workspace_by_type_and_name( dict: Workspace information or None if the workspace is not found. """ + if not type_name and not type_id: + logger.error( + "No workspace type specified - neither by type name nor type ID. Cannot lookup workspace(s)!" + ) + return None + # Add query parameters (these are NOT passed via JSon body!) query = { "expanded_view": expanded_view, @@ -4987,6 +5726,9 @@ def get_workspace_by_type_and_name( query["where_workspace_type_id"] = type_id if name: query["where_name"] = name + if page and limit: + query["page"] = page + query["limit"] = limit encoded_query = urllib.parse.urlencode(query, doseq=True) @@ -4996,18 +5738,25 @@ def get_workspace_by_type_and_name( request_header = self.request_form_header() if name: - logger.info( - "Get workspace with name -> %s and type -> %s; calling -> %s", + logger.debug( + "Get workspace with name -> '%s' and type -> '%s'; calling -> %s", name, type_name, request_url, ) else: - logger.info( - "Get all workspace instances of type -> %s; calling -> %s", - type_name, - request_url, - ) + if type_name: + logger.debug( + "Get all workspace instances of type -> '%s'; calling -> %s", + type_name, + request_url, + ) + else: + logger.debug( + "Get all workspace instances with type ID -> %s; calling -> %s", + str(type_id), + request_url, + ) retries = 0 while True: @@ -5021,13 +5770,13 @@ def get_workspace_by_type_and_name( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: if name: logger.warning( - "Failed to get workspace -> %s of type -> %s; status -> %s; error -> %s", + "Failed to get workspace -> '%s' of type -> '%s'; status -> %s; error -> %s", name, type_name, response.status_code, @@ -5035,7 +5784,7 @@ def get_workspace_by_type_and_name( ) else: logger.warning( - "Failed to get workspace instances of type -> %s; status -> %s; error -> %s", + "Failed to get workspace instances of type -> '%s'; status -> %s; error -> %s", type_name, response.status_code, response.text, @@ -5044,6 +5793,36 @@ def get_workspace_by_type_and_name( # end method definition + def get_workspace_type_location( + self, + type_name: str = "", + type_id: int = None, + ) -> int | None: + """Determine the folder in which the workspace instances of a given type reside. + Either the type ID or the type name need to be provided. NOTE: workspace types + may not always have a default location for all its instances. In such case + parent_id may just be the folder of the first delivered workspace instance. + + Args: + type_name (str, optional): Name of the workspace type. Defaults to "". + type_id (int, optional): ID of the workspace type. Defaults to None. + + Returns: + int | None: node ID of the parent folder + """ + + # it seems there's no other way to get the workspace location configured for a + # workspace type other then getting an example workspace of this type and see what + # the parent is. The REST API used for get_workspace_types() does not deliver this information :-( + response = self.get_workspace_by_type_and_name( + type_name=type_name, type_id=type_id, page=1, limit=1 + ) + parent_id = self.get_result_value(response=response, key="parent_id") + + return parent_id + + # end method definition + def get_workspace_by_business_object( self, external_system_name: str, @@ -5138,8 +5917,8 @@ def get_workspace_by_business_object( request_header = self.request_form_header() - logger.info( - "Get workspace via external system -> %s (Business Object Type -> %s; Business Object ID -> %s); calling -> %s", + logger.debug( + "Get workspace via external system -> '%s' (Business Object Type -> '%s'; Business Object ID -> %s); calling -> %s", external_system_name, business_object_type, business_object_id, @@ -5158,13 +5937,13 @@ def get_workspace_by_business_object( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: if show_error: logger.error( - "Failed to get workspace via external system -> %s (Business Object Type -> %s; Business Object ID -> %s); status -> %s; error -> %s", + "Failed to get workspace via external system -> '%s' (Business Object Type -> '%s'; Business Object ID -> %s); status -> %s; error -> %s", external_system_name, business_object_type, business_object_id, @@ -5172,8 +5951,8 @@ def get_workspace_by_business_object( response.text, ) else: - logger.info( - "Cannot fing workspace via external system -> %s (Business Object Type -> %s; Business Object ID -> %s); status -> %s; error -> %s", + logger.debug( + "Cannot fing workspace via external system -> '%s' (Business Object Type -> '%s'; Business Object ID -> %s); status -> %s; error -> %s", external_system_name, business_object_type, business_object_id, @@ -5184,6 +5963,96 @@ def get_workspace_by_business_object( # end method definition + def set_workspace_reference( + self, + workspace_id: int, + external_system_id: str | None = None, + bo_type: str | None = None, + bo_id: str | None = None, + show_error: bool = True, + ): + """Set reference of workspace to a business object in an external system + + Args: + workspace_id (int): ID of the workspace + external_system_id (str, optional): Identifier of the external system (None if no external system) + bo_type (str, optional): Business object type (None if no external system) + bo_id (str, optional): Business object identifier / key (None if no external system) + show_error (bool, optional): Log an error if workspace cration fails. Otherwise log a warning. + """ + + request_url = ( + self.config()["businessWorkspacesUrl"] + + "/" + + str(workspace_id) + + "/workspacereferences" + ) + request_header = self.request_form_header() + + if not external_system_id or not bo_type or not bo_id: + logger.error( + "Cannot update workspace reference - required Business Object information is missing!" + ) + return None + + logger.debug( + "Update workspace reference of workspace ID -> %s with business object connection -> (%s, %s, %s); calling -> %s", + str(workspace_id), + external_system_id, + bo_type, + bo_id, + request_url, + ) + + workspace_put_data = { + "ext_system_id": external_system_id, + "bo_type": bo_type, + "bo_id": bo_id, + } + + retries = 0 + while True: + # This REST API needs a special treatment: we encapsulate the payload as JSON into a "body" tag. + # See https://developer.opentext.com/apis/14ba85a7-4693-48d3-8c93-9214c663edd2/4403207c-40f1-476a-b794-fdb563e37e1f/07229613-7ef4-4519-8b8a-47eaff639d42#operation/createBusinessWorkspace + response = requests.put( + url=request_url, + headers=request_header, + data=workspace_put_data, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + if show_error: + logger.error( + "Failed to update reference for workspace ID -> %s with business object connection -> (%s, %s, %s); status -> %s; error -> %s", + str(workspace_id), + external_system_id, + bo_type, + bo_id, + response.status_code, + response.text, + ) + else: + logger.warning( + "Couldn't update reference for workspace ID -> %s with business object connection -> (%s, %s, %s); status -> %s; error -> %s", + str(workspace_id), + external_system_id, + bo_type, + bo_id, + response.status_code, + response.text, + ) + return None + + # end method definition + def create_workspace( self, workspace_template_id: int, @@ -5191,11 +6060,13 @@ def create_workspace( workspace_description: str, workspace_type: int, category_data: dict | None = None, - external_system_id: int = None, - bo_type: int | None = None, - bo_id: int | None = None, + external_system_id: str | None = None, + bo_type: str | None = None, + bo_id: str | None = None, parent_id: int | None = None, ibo_workspace_id: int | None = None, + external_modify_date: str | None = None, + external_create_date: str | None = None, show_error: bool = True, ) -> dict | None: """Create a new business workspace. @@ -5211,10 +6082,12 @@ def create_workspace( bo_id (str, optional): Business object identifier / key (None if no external system) parent_id (str, optional): Parent ID of the workspaces. Needs only be specified in special cases where workspace location cannot be derived from workspace - type definition + type definition or if it is a sub-workspace. ibo_workspace_id (int, optional): Node ID of an existing workspace that is already connected to another external system. This allows for subsequent calls to coonect the workspace to multiple Business Objects (IBO = Identical Business Objects) + external_create_date (str, optional) value of the source system in format 2024-06-24 + external_modify_date (str, optional) value of the source system in format 2024-06-24 show_error (bool, optional): Log an error if workspace cration fails. Otherwise log a warning. Returns: dict: Workspace Create Form data or None if the request fails. @@ -5231,22 +6104,24 @@ def create_workspace( "wksp_type_id": str(workspace_type), "type": str(848), "roles": category_data, + "external_create_date": external_create_date, + "external_modify_date": external_modify_date, } # Is this workspace connected to a business application / external system? if external_system_id and bo_type and bo_id: - create_workspace_post_data["ext_system_id"] = str(external_system_id) - create_workspace_post_data["bo_type"] = str(bo_type) - create_workspace_post_data["bo_id"] = str(bo_id) - logger.info( - "Use business object connection -> (%s, %s, %s) for workspace -> %s", - str(external_system_id), - str(bo_type), - str(bo_id), + create_workspace_post_data["ext_system_id"] = external_system_id + create_workspace_post_data["bo_type"] = bo_type + create_workspace_post_data["bo_id"] = bo_id + logger.debug( + "Use business object connection -> (%s, %s, %s) for workspace -> '%s'", + external_system_id, + bo_type, + bo_id, workspace_name, ) if ibo_workspace_id: - logger.info( + logger.debug( "This is a subsequent call to create a cross-application workspace (IBO)" ) create_workspace_post_data["ibo_workspace_id"] = ibo_workspace_id @@ -5256,14 +6131,14 @@ def create_workspace( # also be the case if workspaces are nested into each other: if parent_id is not None: create_workspace_post_data["parent_id"] = parent_id - logger.info( - "Use specified location -> %s for workspace -> %s", + logger.debug( + "Use specified location -> %s for workspace -> '%s'", str(parent_id), workspace_name, ) else: - logger.info( - "Determine location of workspace -> %s via workspace type -> %s", + logger.debug( + "Determine location of workspace -> %s via workspace type -> '%s'", workspace_name, str(workspace_type), ) @@ -5271,8 +6146,8 @@ def create_workspace( request_url = self.config()["businessWorkspacesUrl"] request_header = self.request_form_header() - logger.info( - "Create workspace -> %s with type -> %s from template -> %s; calling -> %s", + logger.debug( + "Create workspace -> '%s' with type -> '%s' from template -> %s; calling -> %s", workspace_name, str(workspace_type), str(workspace_template_id), @@ -5294,13 +6169,23 @@ def create_workspace( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 + elif response.status_code == 500 and "already exists" in response.text: + logger.warning( + "Could not create workspace -> '%s' from template with ID -> %s (it already exists); status -> %s; error -> %s", + workspace_name, + str(workspace_template_id), + response.status_code, + response.text, + ) + return self.parse_request_response(response) + # return None else: if show_error: logger.error( - "Failed to create workspace -> %s from template with ID -> %s; status -> %s; error -> %s", + "Failed to create workspace -> '%s' from template with ID -> %s; status -> %s; error -> %s", workspace_name, str(workspace_template_id), response.status_code, @@ -5308,7 +6193,7 @@ def create_workspace( ) else: logger.warning( - "Couldn't create workspace -> %s from template with ID -> %s (it may exist already); status -> %s; error -> %s", + "Failed to create workspace -> '%s' from template with ID -> %s; status -> %s; error -> %s", workspace_name, str(workspace_template_id), response.status_code, @@ -5318,11 +6203,70 @@ def create_workspace( # end method definition + def update_workspace( + self, + workspace_id: int, + workspace_name: str | None = None, + workspace_description: str | None = None, + category_data: dict | None = None, + external_system_id: str | None = None, + bo_type: str | None = None, + bo_id: str | None = None, + external_modify_date: str | None = None, + external_create_date: str | None = None, + show_error: bool = True, + ) -> bool: + """Update an existing business workspace. This is a wrapper method to update + a combination of workspace name / description, workspace reference, and workspace metadata + + Args: + workspace_id (int): ID of the workspace + workspace_name (str): New Name of the workspace + workspace_description (str): New Description of the workspace + category_data (dict): Category and attributes + external_system_id (str, optional): Identifier of the external system (None if no external system) + bo_type (str, optional): Business object type (None if no external system) + bo_id (str, optional): Business object identifier / key (None if no external system) + show_error (bool, optional): Log an error if workspace cration fails. Otherwise log a warning. + Returns: + dict: Response of the REST API call or None if the request fails. + """ + + # Should we connect this workspace to a business application / external system? + if external_system_id and bo_type and bo_id: + response = self.set_workspace_reference( + workspace_id=workspace_id, + external_system_id=external_system_id, + bo_type=bo_type, + bo_id=bo_id, + show_error=show_error, + ) + if not response: + return False + + # Should we change the name and/or the description or the category data of this workspace? + if workspace_name or workspace_description: + response = self.update_item( + node_id=workspace_id, + item_name=workspace_name, + item_description=workspace_description, + category_data=category_data, + external_create_date=external_create_date, + external_modify_date=external_modify_date, + ) + if not response: + return False + + return True + + # end method definition + def create_workspace_relationship( self, workspace_id: int, related_workspace_id: int, relationship_type: str = "child", + show_error: bool = True, ) -> dict | None: """Create a relationship between two workspaces. @@ -5330,6 +6274,8 @@ def create_workspace_relationship( workspace_id (int): ID of the workspace related_workspace_id (int): ID of the related workspace relationship_type (str, optional): "parent" or "child" - "child" is default if omitted + show_error (bool, optional): Log an error if relationship cration fails. + Otherwise log a warning. Returns: dict: Workspace Relationship data (json) or None if the request fails. """ @@ -5344,7 +6290,7 @@ def create_workspace_relationship( ] + "/{}/relateditems".format(workspace_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Create workspace relationship between -> %s and -> %s; calling -> %s", str(workspace_id), str(related_workspace_id), @@ -5364,26 +6310,45 @@ def create_workspace_relationship( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: - logger.error( - "Failed to create workspace relationship between -> %s and -> %s; status -> %s; error -> %s", - str(workspace_id), - str(related_workspace_id), - response.status_code, - response.text, - ) + if show_error: + logger.error( + "Failed to create workspace relationship between -> %s and -> %s; status -> %s; error -> %s", + str(workspace_id), + str(related_workspace_id), + response.status_code, + response.text, + ) + else: + logger.warning( + "Cannot create workspace relationship between -> %s and -> %s (it may exist already); status -> %s; error -> %s", + str(workspace_id), + str(related_workspace_id), + response.status_code, + response.text, + ) return None # end method definition - def get_workspace_relationships(self, workspace_id: int) -> dict | None: - """Get the Workspace relationships to other workspaces. + def get_workspace_relationships( + self, + workspace_id: int, + relationship_type: str | None = None, + related_workspace_name: str | None = None, + related_workspace_type_id: int | None = None, + ) -> dict | None: + """Get the Workspace relationships to other workspaces. Optionally, filter criterias can be provided + such as the related workspace name (starts with) or the related workspace TYPE ids (one or multiple) Args: workspace_id (int): ID of the workspace template + relationship_type (str): Either "parent" or "child" (or None = unspecified which is the default) + related_workspace_name (str, optional): filter for a certain workspace name in the related items. + related_workspace_type_id (int | None): ID of related workspace type (or list of IDs) Returns: dict: Workspace relationships or None if the request fails. """ @@ -5394,9 +6359,24 @@ def get_workspace_relationships(self, workspace_id: int) -> dict | None: + str(workspace_id) + "/relateditems" ) + + query = {} + + if relationship_type: + query["where_relationtype"] = relationship_type + + if related_workspace_name: + query["where_name"] = related_workspace_name + + if related_workspace_type_id: + query["where_workspace_type_id"] = related_workspace_type_id + + encoded_query = urllib.parse.urlencode(query, doseq=False) + request_url += "?{}".format(encoded_query) + request_header = self.request_form_header() - logger.info( + logger.debug( "Get related workspaces for workspace with ID -> %s; calling -> %s", str(workspace_id), request_url, @@ -5414,7 +6394,7 @@ def get_workspace_relationships(self, workspace_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -5432,7 +6412,7 @@ def get_workspace_roles(self, workspace_id: int) -> dict | None: """Get the Workspace roles. Args: - workspace_id (int): ID of the workspace template + workspace_id (int): ID of the workspace template or workspace Returns: dict: Workspace Roles data or None if the request fails. """ @@ -5442,7 +6422,7 @@ def get_workspace_roles(self, workspace_id: int) -> dict | None: ) request_header = self.request_form_header() - logger.info( + logger.debug( "Get workspace roles of workspace with ID -> %s; calling -> %s", str(workspace_id), request_url, @@ -5460,7 +6440,7 @@ def get_workspace_roles(self, workspace_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -5474,7 +6454,54 @@ def get_workspace_roles(self, workspace_id: int) -> dict | None: # end method definition - def add_member_to_workspace( + def get_workspace_members(self, workspace_id: int, role_id: int) -> dict | None: + """Get the Workspace members of a given role. + + Args: + workspace_id (int): ID of the workspace template + role_id (int): ID of the role + Returns: + dict: Workspace member data or None if the request fails. + """ + + request_url = self.config()[ + "businessWorkspacesUrl" + ] + "/{}/roles/{}/members".format(workspace_id, role_id) + request_header = self.request_form_header() + + logger.debug( + "Get workspace members for workspace ID -> %s and role ID -> %s; calling -> %s", + str(workspace_id), + str(role_id), + request_url, + ) + + retries = 0 + while True: + response = requests.get( + url=request_url, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to get workspace members; status -> %s; error -> %s", + response.status_code, + response.text, + ) + return None + + # end method definition + + def add_workspace_member( self, workspace_id: int, role_id: int, member_id: int, show_warning: bool = True ) -> dict | None: """Add member to a workspace role. Check that the user/group is not yet a member. @@ -5488,36 +6515,16 @@ def add_member_to_workspace( dict: Workspace Role Membership or None if the request fails. """ - add_member_to_workspace_post_data = {"id": str(member_id)} - - request_url = self.config()[ - "businessWorkspacesUrl" - ] + "/{}/roles/{}/members".format(workspace_id, role_id) - request_header = self.request_form_header() - - logger.info( - "Check if user/group with ID -> %s is already in role with ID -> %s of workspace with ID -> %s; calling -> %s", + logger.debug( + "Check if user/group with ID -> %s is already in role with ID -> %s of workspace with ID -> %s", str(member_id), str(role_id), str(workspace_id), - request_url, ) - response = requests.get( - url=request_url, - headers=request_header, - cookies=self.cookie(), - timeout=None, + workspace_members = self.get_workspace_members( + workspace_id=workspace_id, role_id=role_id ) - if not response.ok: - logger.error( - "Failed to get workspace members; status -> %s; error -> %s", - response.status_code, - response.text, - ) - return None - - workspace_members = self.parse_request_response(response) if self.exist_result_item(workspace_members, "id", member_id): if show_warning: @@ -5529,7 +6536,14 @@ def add_member_to_workspace( ) return workspace_members - logger.info( + add_workspace_member_post_data = {"id": str(member_id)} + + request_url = self.config()[ + "businessWorkspacesUrl" + ] + "/{}/roles/{}/members".format(workspace_id, role_id) + request_header = self.request_form_header() + + logger.debug( "Add user/group with ID -> %s to role with ID -> %s of workspace with ID -> %s; calling -> %s", str(member_id), str(role_id), @@ -5537,30 +6551,36 @@ def add_member_to_workspace( request_url, ) - response = requests.post( - url=request_url, - headers=request_header, - data=add_member_to_workspace_post_data, - cookies=self.cookie(), - timeout=None, - ) - - if response.ok: - return self.parse_request_response(response) - else: - logger.error( - "Failed to add user/group with ID -> %s to role with ID -> %s of workspace with ID -> %s; status -> %s; error -> %s", - str(member_id), - str(role_id), - str(workspace_id), - response.status_code, - response.text, + retries = 0 + while True: + response = requests.post( + url=request_url, + headers=request_header, + data=add_workspace_member_post_data, + cookies=self.cookie(), + timeout=None, ) - return None + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to add user/group with ID -> %s to role with ID -> %s of workspace with ID -> %s; status -> %s; error -> %s", + str(member_id), + str(role_id), + str(workspace_id), + response.status_code, + response.text, + ) + return None # end method definition - def remove_member_from_workspace( + def remove_workspace_member( self, workspace_id: int, role_id: int, member_id: int, show_warning: bool = True ) -> dict | None: """Remove a member from a workspace role. Check that the user is currently a member. @@ -5574,34 +6594,16 @@ def remove_member_from_workspace( dict: Workspace Role Membership or None if the request fails. """ - request_url = self.config()[ - "businessWorkspacesUrl" - ] + "/{}/roles/{}/members".format(workspace_id, role_id) - request_header = self.request_form_header() - - logger.info( - "Check if user/group with ID -> %s is in role with ID -> %s of workspace with ID -> %s; calling -> %s", + logger.debug( + "Check if user/group with ID -> %s is in role with ID -> %s of workspace with ID -> %s", str(member_id), str(role_id), str(workspace_id), - request_url, ) - response = requests.get( - url=request_url, - headers=request_header, - cookies=self.cookie(), - timeout=None, + workspace_members = self.get_workspace_members( + workspace_id=workspace_id, role_id=role_id ) - if not response.ok: - logger.error( - "Failed to get workspace members; status -> %s; error -> %s", - response.status_code, - response.text, - ) - return None - - workspace_members = self.parse_request_response(response) if not self.exist_result_item(workspace_members, "id", member_id): if show_warning: @@ -5616,8 +6618,9 @@ def remove_member_from_workspace( request_url = self.config()[ "businessWorkspacesUrl" ] + "/{}/roles/{}/members/{}".format(workspace_id, role_id, member_id) + request_header = self.request_form_header() - logger.info( + logger.debug( "Removing user/group with ID -> %s from role with ID -> %s of workspace with ID -> %s; calling -> %s", str(member_id), str(role_id), @@ -5625,25 +6628,65 @@ def remove_member_from_workspace( request_url, ) - response = requests.delete( - url=request_url, - headers=request_header, - cookies=self.cookie(), - timeout=None, + retries = 0 + while True: + response = requests.delete( + url=request_url, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to remove user/group with ID -> %s from role with ID -> %s of workspace with ID -> %s; status -> %s; error -> %s", + str(member_id), + str(role_id), + str(workspace_id), + response.status_code, + response.text, + ) + return None + + # end method definition + + def remove_workspace_members( + self, workspace_id: int, role_id: int, show_warning: bool = True + ) -> bool: + """Remove all members from a workspace role. Check that the user is currently a member. + + Args: + workspace_id (int): ID of the workspace + role_id (int): ID of the role + show_warning (bool, optional): If True logs a warning if member is not in role + Returns: + dict: Workspace Role Membership or None if the request fails. + """ + + workspace_members = self.get_workspace_members( + workspace_id=workspace_id, role_id=role_id ) - if response.ok: - return self.parse_request_response(response) - else: - logger.error( - "Failed to remove user/group with ID -> %s from role with ID -> %s of workspace with ID -> %s; status -> %s; error -> %s", - str(member_id), - str(role_id), - str(workspace_id), - response.status_code, - response.text, + # Get the list of existing workspace_member ids: + workspace_member_ids = self.get_result_values(workspace_members, "id") + if not workspace_member_ids: + return False + + for workspace_member_id in workspace_member_ids: + self.remove_workspace_member( + workspace_id=workspace_id, + role_id=role_id, + member_id=workspace_member_id, + show_warning=show_warning, ) - return None + + return True # end method definition @@ -5680,7 +6723,7 @@ def assign_workspace_permissions( request_header = self.request_form_header() - logger.info( + logger.debug( "Updating Permissions of role with ID -> %s of workspace with ID -> %s with permissions -> %s; calling -> %s", str(role_id), str(workspace_id), @@ -5706,7 +6749,7 @@ def assign_workspace_permissions( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -5760,7 +6803,7 @@ def update_workspace_icon( request_header = self.cookie() - logger.info( + logger.debug( "Update icon for workspace ID -> %s with icon file -> %s; calling -> %s", str(workspace_id), file_path, @@ -5781,7 +6824,7 @@ def update_workspace_icon( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -5839,14 +6882,14 @@ def get_unique_names(self, names: list, subtype: int | None = None) -> dict | No request_header = self.request_form_header() if subtype: - logger.info( + logger.debug( "Get unique names -> %s with subtype -> %s; calling -> %s", str(names), str(subtype), request_url, ) else: - logger.info( + logger.debug( "Get unique names -> %s; calling -> %s", str(names), request_url, @@ -5864,7 +6907,7 @@ def get_unique_names(self, names: list, subtype: int | None = None) -> dict | No return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -5893,18 +6936,20 @@ def create_item( item_description: str = "", url: str = "", original_id: int = 0, + show_error: bool = True, ) -> dict | None: """Create an Extended ECM item. This REST call is somewhat limited. It cannot set favortie (featured item) or hidden item. It does also not accept owner group information. Args: parent_id (int): Node ID of the parent - item_type (str): Type of the item (e.g. 0 = foler, 140 = URL) + item_type (str): Type of the item (e.g. 0 = folder, 140 = URL) item_name (str): Name of the item item_description (str, optional): Description of the item url (str, optional): Address of the URL item (if it is an URL item type) original_id (int, optional): Node ID of the original (referenced) item. Required if a shortcut item is created + show_error (bool, optional): Log an error if item cration fails. Otherwise log a warning. Returns: dict: Request response of the create item call or None if the REST call has failed. """ @@ -5924,8 +6969,8 @@ def create_item( request_url = self.config()["nodesUrlv2"] request_header = self.request_form_header() - logger.info( - "Create item -> %s (type -> %s) under parent with ID -> %s; calling -> %s", + logger.debug( + "Create item -> '%s' (type -> %s) under parent with ID -> %s; calling -> %s", item_name, item_type, str(parent_id), @@ -5946,16 +6991,32 @@ def create_item( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 - else: - logger.error( - "Failed to create item -> %s; status -> %s; error -> %s", + elif response.status_code == 500 and "already exists" in response.text: + logger.warning( + "Could not create item -> '%s' (it already exists); status -> %s; error -> %s", item_name, response.status_code, response.text, ) + return self.parse_request_response(response) + else: + if show_error: + logger.error( + "Failed to create item -> %s; status -> %s; error -> %s", + item_name, + response.status_code, + response.text, + ) + else: + logger.warning( + "Cannot create item -> %s (it may exist already); status -> %s; error -> %s", + item_name, + response.status_code, + response.text, + ) return None # end method definition @@ -5963,18 +7024,24 @@ def create_item( def update_item( self, node_id: int, - parent_id: int = 0, - item_name: str = "", - item_description: str = "", + parent_id: int | None = None, + item_name: str | None = None, + item_description: str | None = None, + category_data: dict | None = None, + external_modify_date: str | None = None, + external_create_date: str | None = None, ) -> dict | None: - """Update an Extended ECM item (parent, name, description). Changing the parent ID is - a move operation. If parent ID = 0 the item will not be moved. + """Update an Extended ECM item (parent, name, description, metadata). Changing the parent ID is + a move operation. If parent ID = 0 or None the item will not be moved. Args: node_id (int): ID of the node - parent_id (int, optional): node ID of the new parent (in case of a move operation) - item_name (str, optional): new name of the item - item_description (str, optional): new description of the item + parent_id (int | None, optional): node ID of the new parent (in case of a move operation) + item_name (str | None, optional): new name of the item + item_description (str | None, optional): new description of the item + category_data (dict | None): new category and attributes values + external_create_date (str, optional) value of the source system in format 2024-06-24 + external_modify_date (str, optional) value of the source system in format 2024-06-24 Returns: dict: Response of the update item request or None if the REST call has failed. """ @@ -5991,11 +7058,17 @@ def update_item( # this is a move operation update_item_put_data["parent_id"] = parent_id + # Set external dates if provided: + if external_create_date: + update_item_put_data["external_create_date"] = external_create_date + if external_modify_date: + update_item_put_data["external_modify_date"] = external_modify_date + request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) request_header = self.request_form_header() - logger.info( - "Update item -> %s with data -> %s; calling -> %s", + logger.debug( + "Update item -> '%s' with data -> %s; calling -> %s", item_name, str(update_item_put_data), request_url, @@ -6012,10 +7085,21 @@ def update_item( timeout=None, ) if response.ok: - return self.parse_request_response(response) + parsed_response = self.parse_request_response(response) + + if category_data: + for category in category_data: + response = self.set_category_values( + node_id=node_id, + category_id=category, + category_data=category_data[category], + ) + # we want to return the result of the main REST API call: + return parsed_response + # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6073,7 +7157,7 @@ def get_document_templates(self, parent_id: int): ) request_header = self.request_form_header() - logger.info( + logger.debug( "Get document templates for target location -> %s (parent ID); calling -> %s", str(parent_id), request_url, @@ -6092,7 +7176,7 @@ def get_document_templates(self, parent_id: int): return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6111,7 +7195,7 @@ def create_document_from_template( template_id: int, parent_id: int, classification_id: int, - category_data: dict, + category_data: dict | None, doc_name: str, doc_desciption: str = "", ) -> dict | None: @@ -6154,8 +7238,8 @@ def create_document_from_template( request_url = self.config()["doctemplatesUrl"] request_header = self.request_form_header() - logger.info( - "Create document -> %s from template with ID -> %s in target location -> %s (parent ID) with classification ID -> %s; calling -> %s", + logger.debug( + "Create document -> '%s' from template with ID -> %s in target location with ID -> %s with classification ID -> %s; calling -> %s", doc_name, str(template_id), str(parent_id), @@ -6179,12 +7263,12 @@ def create_document_from_template( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to create document -> %s; status -> %s; error -> %s", + "Failed to create document -> '%s'; status -> %s; error -> %s", doc_name, response.status_code, response.text, @@ -6193,7 +7277,168 @@ def create_document_from_template( # end method definition - def get_web_report_parameters(self, nickname: str): + def create_wiki( + self, + parent_id: int, + name: str, + description: str = "", + show_error: bool = True, + ) -> dict | None: + """Create an Extended ECM Wiki. + + Args: + parent_id (int): Node ID of the parent + name (str): Name of the wiki item + description (str, optional): Description of the wiki item + show_error (bool, optional): Log an error if item cration fails. Otherwise log a warning. + Returns: + dict: Request response of the create item call or None if the REST call has failed. + """ + + create_item_post_data = { + "parent_id": parent_id, + "type": 5573, + "name": name, + "description": description, + } + + request_url = self.config()["nodesUrlv2"] + request_header = self.request_form_header() + + logger.debug( + "Create wiki -> '%s' under parent with ID -> %s; calling -> %s", + name, + str(parent_id), + request_url, + ) + + retries = 0 + while True: + # This REST API needs a special treatment: we encapsulate the payload as JSON into a "body" tag. + response = requests.post( + url=request_url, + data={"body": json.dumps(create_item_post_data)}, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + elif response.status_code == 500 and "already exists" in response.text: + logger.warning( + "Could not create wiki -> '%s' (it already exists); status -> %s; error -> %s", + name, + response.status_code, + response.text, + ) + return self.parse_request_response(response) + else: + if show_error: + logger.error( + "Failed to create wiki -> %s; status -> %s; error -> %s", + name, + response.status_code, + response.text, + ) + else: + logger.warning( + "Cannot create wiki -> %s (it may exist already); status -> %s; error -> %s", + name, + response.status_code, + response.text, + ) + return None + + # end method definition + + def create_wiki_page( + self, + wiki_id: int, + name: str, + content: str = "", + description: str = "", + show_error: bool = True, + ) -> dict | None: + """Create an Extended ECM wiki page. + + Args: + wiki_id (int): Node ID of the wiki + name (str): Name of the wiki page + content (str, optional): Content of the page (typically HTML) + show_error (bool, optional): Log an error if item cration fails. Otherwise log a warning. + Returns: + dict: Request response of the create wiki page call or None if the REST call has failed. + """ + + create_item_post_data = { + "parent_id": wiki_id, + "type": 5574, + "name": name, + "description": description, + "TextField": content, + } + + request_url = self.config()["nodesUrl"] + # Header needs to just include the cookie: + request_header = self.cookie() + + logger.debug( + "Create wiki page -> '%s' in wiki with ID -> %s; calling -> %s", + name, + str(wiki_id), + request_url, + ) + + retries = 0 + while True: + # This REST API needs a special treatment: we encapsulate the payload as JSON into a "body" tag. + response = requests.post( + url=request_url, + # data={"body": json.dumps(create_item_post_data)}, + data=create_item_post_data, + headers=request_header, + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + elif response.status_code == 500 and "already exists" in response.text: + logger.warning( + "Could not create wiki page -> '%s' (it already exists); status -> %s; error -> %s", + name, + response.status_code, + response.text, + ) + return self.parse_request_response(response) + else: + if show_error: + logger.error( + "Failed to create wiki page -> %s; status -> %s; error -> %s", + name, + response.status_code, + response.text, + ) + else: + logger.warning( + "Cannot create wiki page -> %s (it may exist already); status -> %s; error -> %s", + name, + response.status_code, + response.text, + ) + return None + + # end method definition + + def get_web_report_parameters(self, nickname: str) -> list | None: """Get parameters of a Web Report in Extended ECM. These are defined on the Web Report node (Properties --> Parameters) @@ -6218,8 +7463,8 @@ def get_web_report_parameters(self, nickname: str): request_url = self.config()["webReportsUrl"] + "/" + nickname + "/parameters" request_header = self.request_form_header() - logger.info( - "Retrieving parameters of Web Report with nickname -> %s; calling -> %s", + logger.debug( + "Retrieving parameters of Web Report with nickname -> '%s'; calling -> %s", nickname, request_url, ) @@ -6240,12 +7485,12 @@ def get_web_report_parameters(self, nickname: str): return result_dict["data"] # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to retrieve parameters of Web Report with nickname -> %s; status -> %s; error -> %s", + "Failed to retrieve parameters of Web Report with nickname -> '%s'; status -> %s; error -> %s", nickname, response.status_code, response.text, @@ -6273,7 +7518,7 @@ def run_web_report( request_url = self.config()["webReportsUrl"] + "/" + nickname request_header = self.request_form_header() - logger.info( + logger.debug( "Running Web Report with nickname -> %s; calling -> %s", nickname, request_url, @@ -6292,7 +7537,7 @@ def run_web_report( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6320,7 +7565,7 @@ def install_cs_application(self, application_name: str) -> dict | None: request_url = self.config()["csApplicationsUrl"] + "/install" request_header = self.request_form_header() - logger.info( + logger.debug( "Install CS Application -> %s; calling -> %s", application_name, request_url ) @@ -6337,12 +7582,12 @@ def install_cs_application(self, application_name: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to install CS Application -> %s; status -> %s; error -> %s", + "Failed to install CS Application -> '%s'; status -> %s; error -> %s", application_name, response.status_code, response.text, @@ -6378,8 +7623,8 @@ def assign_item_to_user_group( request_header = self.request_form_header() - logger.info( - "Assign item with ID -> %s to assignees -> %s (subject -> %s); calling -> %s", + logger.debug( + "Assign item with ID -> %s to assignees -> %s (subject -> '%s'); calling -> %s", str(node_id), str(assignees), subject, @@ -6400,7 +7645,7 @@ def assign_item_to_user_group( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6555,8 +7800,8 @@ def assign_permission( request_header = self.request_form_header() - logger.info( - "Assign permissions -> %s to item with ID -> %s; assignee type -> %s; calling -> %s", + logger.debug( + "Assign permissions -> %s to item with ID -> %s; assignee type -> '%s'; calling -> %s", str(permissions), str(node_id), assignee_type, @@ -6589,7 +7834,7 @@ def assign_permission( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6604,14 +7849,119 @@ def assign_permission( # end method definition - def get_node_categories(self, node_id: int, metadata: bool = True): + def get_node_categories(self, node_id: int, metadata: bool = True) -> dict | None: """Get categories assigned to a node. Args: node_id (int): ID of the node to get the categories for. metadata (bool, optional): expand the attribute definitions of the category. Default is True. Returns: - dict: category response or None if the call to the REST API fails. + dict | None: category response or None if the call to the REST API fails. + + Example: + [ + { + 'data': { + 'categories': { + '16878_25': 'Customer', + '16878_28': '50031', + '16878_29': 'Global Trade AG', + '16878_30': 'Gutleutstraße 53', + '16878_31': 'Germany', + '16878_32': '60329', + '16878_33': ['1000'], + '16878_34': 'Frankfurt', + '16878_37': ['Retail'], + '16878_38': '0000050031', + '16878_39_1_40': '0000001096', + '16878_39_1_41': 'Heinz Hart', + '16878_39_1_42': 'Purchasing', + '16878_39_1_43': 'Purchasing Manager', + '16878_39_1_44': '+49695325410', + '16878_39_1_45': '+49695325499', + '16878_39_1_46': 'Heinz.Hart@GlobalTrade.com', + '16878_39_1_47': 'B', + '16878_39_1_48': '4', + ... + } + }, + 'metadata': { + '16878': { + 'allow_undefined': False, + 'bulk_shared': False, + 'default_value': None, + 'description': None, + 'hidden': False, + 'key': '16878', + 'key_value_pairs': False, + 'multi_value': False, + 'multi_value_length_default': 1, + 'multi_value_length_fixed': True, + 'multi_value_length_max': 1, + 'multi_value_max_length': None, + 'multi_value_min_length': None, + 'multi_value_unique': False, + 'name': 'Customer', + 'next_id': 83, + 'persona': 'category', + 'read_only': True, + 'required': False, + ... + }, + '16878_25': { + 'allow_undefined': False, + 'bulk_shared': False, + 'default_value': None, + 'description': None, + 'hidden': False, + 'key': '16878_25', + 'key_value_pairs': False, + 'max_length': None, + 'min_length': None, + 'multi_select': False, + 'multi_value': False, + 'multi_value_length_default': 1, + 'multi_value_length_fixed': True, + 'multi_value_length_max': 1, + 'multi_value_max_length': None, + 'multi_value_min_length': None, + 'multi_value_unique': False, + 'multiline': False, + 'multilingual': False, + ... + }, + '16878_28': { + 'allow_undefined': False, + 'bulk_shared': False, + 'default_value': None, + 'description': None, + 'hidden': False, + 'key': '16878_28', + 'key_value_pairs': False, + 'max_length': 10, + 'min_length': None, + 'multi_select': False, + 'multi_value': False, + 'multi_value_length_default': 1, + 'multi_value_length_fixed': True, + 'multi_value_length_max': 1, + 'multi_value_max_length': None, + 'multi_value_min_length': None, + 'multi_value_unique': False, + 'multiline': False, + 'multilingual': False, + ... + }, + ... + } + 'metadata_map': { + 'categories': {'16878': ['16878_2', '16878_3', '16878_4', '16878_5', '16878_6', '16878_7', '16878_8']} + } + 'metadata_order': { + 'categories': ['16878'] + } + } + ] """ request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/categories" @@ -6619,7 +7969,7 @@ def get_node_categories(self, node_id: int, metadata: bool = True): request_url += "?metadata" request_header = self.request_form_header() - logger.info( + logger.debug( "Get categories of node with ID -> %s; calling -> %s", str(node_id), request_url, @@ -6637,7 +7987,7 @@ def get_node_categories(self, node_id: int, metadata: bool = True): return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6651,7 +8001,9 @@ def get_node_categories(self, node_id: int, metadata: bool = True): # end method definition - def get_node_category(self, node_id: int, category_id: int, metadata: bool = True): + def get_node_category( + self, node_id: int, category_id: int, metadata: bool = True + ) -> dict | None: """Get a specific category assigned to a node. Args: @@ -6673,7 +8025,7 @@ def get_node_category(self, node_id: int, category_id: int, metadata: bool = Tru request_url += "?metadata" request_header = self.request_form_header() - logger.info( + logger.debug( "Get category with ID -> %s on node with ID -> %s; calling -> %s", str(category_id), str(node_id), @@ -6692,7 +8044,7 @@ def get_node_category(self, node_id: int, category_id: int, metadata: bool = Tru return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6731,10 +8083,35 @@ def get_node_category_ids(self, node_id: int) -> list: # end method definition + def get_node_category_names(self, node_id: int) -> list | None: + """Get list of all category names that are assign to the node. + + Args: + node_id (int): ID of the node to get the categories for. + Returns: + list: list of category names (all categories assigned to the node) + """ + + categories = self.get_node_categories(node_id=node_id, metadata=True) + if not categories or not categories["results"]: + return None + + # List comprehension to extract category names safely + return [ + next(iter(category["metadata"]["categories"].values()), {}).get("name") + for category in categories["results"] + ] + + # end method definition + def get_node_category_definition( - self, node_id: int, category_name: str + self, + node_id: int, + category_name: str, ) -> tuple[int, dict]: - """Get category definition (category id and attribute IDs and types) + """Get category definition (category id and attribute names, IDs and types). + This is a convenience method that wraps the the complex return value + of get_node_categories() in an easier to parse structure. Args: node_id (int): node to read the category definition from @@ -6743,47 +8120,105 @@ def get_node_category_definition( category_name (str): name of the category Returns: int: category ID - dict: keys are the attribute names. values are sub-dictionaries with the id and type of the attribute. + dict: dict keys are the attribute names. dict values are sub-dictionaries with the id and type of the attribute. + For set attributes the key is constructed as :. + Set attributes also incluide an additional value "set_id". Example response: { - 'Status': { - 'id': '12532_2', - 'type': 'String' - }, - 'Legal Approval': { - 'id': '12532_3', - 'type': 'user' - }, - ... + 'Status': {'id': '16892_25', 'type': 'String'}, + 'Customer Number': {'id': '16892_28', 'type': 'String'}, + 'Name': {'id': '16892_29', 'type': 'String'}, + 'Street': {'id': '16892_30', 'type': 'String'}, + 'Country': {'id': '16892_31', 'type': 'String'}, + 'Postal code': {'id': '16892_32', 'type': 'String'}, + 'Sales organisation': {'id': '16892_33', 'type': 'String'}, + 'City': {'id': '16892_34', 'type': 'String'}, + 'Industry': {'id': '16892_37', 'type': 'String'}, + 'Object Key': {'id': '16892_38', 'type': 'String'}, + 'Contacts': {'id': '16892_39', 'type': 'set'}, + 'Contacts:BP No': {'id': '16892_39_x_40', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Name': {'id': '16892_39_x_41', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Department': {'id': '16892_39_x_42', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Function': {'id': '16892_39_x_43', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Phone': {'id': '16892_39_x_44', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Fax': {'id': '16892_39_x_45', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Email': {'id': '16892_39_x_46', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Building': {'id': '16892_39_x_47', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Floor': {'id': '16892_39_x_48', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Room': {'id': '16892_39_x_49', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Comments': {'id': '16892_39_x_50', 'type': 'String', 'set_id': '16892_39'}, + 'Contacts:Valid from': {'id': '16892_39_x_51', 'type': 'Date', 'set_id': '16892_39'}, + 'Contacts:Valid to': {'id': '16892_39_x_52', 'type': 'Date', 'set_id': '16892_39'}, + 'Sales Areas': {'id': '16892_53', 'type': 'set'}, + 'Sales Areas:Sales Organisation': {'id': '16892_53_x_54', 'type': 'String', 'set_id': '16892_53'}, + 'Sales Areas:Distribution Channel': {'id': '16892_53_x_55', 'type': 'String', 'set_id': '16892_53'}, + 'Sales Areas:Division': {'id': '16892_53_x_56', 'type': 'String', 'set_id': '16892_53'}, + 'Rating': {'id': '16892_57', 'type': 'set'}, + 'Rating:Credit Standing': {'id': '16892_57_x_58', 'type': 'String', 'set_id': '16892_57'}, + 'Rating:Date': {'id': '16892_57_x_59', 'type': 'Date', 'set_id': '16892_57'}, + 'Rating:Status': {'id': '16892_57_x_60', 'type': 'String', 'set_id': '16892_57'}, + 'Rating:add. Information': {'id': '16892_57_x_61', 'type': 'String', 'set_id': '16892_57'}, + 'Rating:Institute': {'id': '16892_57_x_62', 'type': 'String', 'set_id': '16892_57'}, + 'Rating:Rating': {'id': '16892_57_x_63', 'type': 'String', 'set_id': '16892_57'}, + 'Locations': {'id': '16892_75', 'type': 'set'}, + 'Locations:Type': {'id': '16892_75_x_76', 'type': 'String', 'set_id': '16892_75'}, + 'Locations:Street': {'id': '16892_75_x_77', 'type': 'String', 'set_id': '16892_75'}, + 'Locations:City': {'id': '16892_75_x_78', 'type': 'String', 'set_id': '16892_75'}, + 'Locations:Country': {'id': '16892_75_x_79', 'type': 'String', 'set_id': '16892_75'}, + 'Locations:Postal code': {'id': '16892_75_x_80', 'type': 'String', 'set_id': '16892_75'}, + 'Locations:Valid from': {'id': '16892_75_x_81', 'type': 'Date', 'set_id': '16892_75'}, + 'Locations:Valid to': {'id': '16892_75_x_82', 'type': 'Date', 'set_id': '16892_75'} } """ attribute_definitions = {} - cat_id = -1 response = self.get_node_categories(node_id) if response and response["results"]: for categories in response["results"]: + # get all metadata IDs keys = categories["metadata"]["categories"].keys() + # There's one without an underscore - that's the ID of the category itself: cat_id = next((key for key in keys if "_" not in key), -1) cat_name = categories["metadata"]["categories"][cat_id]["name"] + # Check we have the category we are looking for: if cat_name != category_name: + # Wrong category - not matching - go to next continue for att_id in categories["metadata"]["categories"]: if not "_" in att_id: + # We skip the element representing the category itself: continue att_name = categories["metadata"]["categories"][att_id]["name"] - if categories["metadata"]["categories"][att_id]["persona"]: - att_type = categories["metadata"]["categories"][att_id][ - "persona" - ] + att_persona = categories["metadata"]["categories"][att_id][ + "persona" + ] + # Persona can be either "set" or "categoryattribute". + # If the persona is "set" we store the set information: + if att_persona == "set": + # We save the set name and ID for the attributes that follow: + set_name = att_name + set_id = att_id + # Attribute types can be "String", ... + # For the set attribute itself the type_name = "Assoc" + att_type = categories["metadata"]["categories"][att_id]["type_name"] + if "_x_" in att_id: # this is not true for the set attribute itself + # set_name and set_id are still set to the name of the proceeding + # for-loop iteration! + attribute_definitions[set_name + ":" + att_name] = { + "id": att_id, + "type": att_type, + "set_id": set_id, + } else: - att_type = categories["metadata"]["categories"][att_id][ - "type_name" - ] - attribute_definitions[att_name] = {"id": att_id, "type": att_type} - return cat_id, attribute_definitions + attribute_definitions[att_name] = { + "id": att_id, + "type": att_type, + } + + return cat_id, attribute_definitions + return -1, {} # end method definition @@ -6827,7 +8262,7 @@ def assign_category( existing_category_ids = self.get_node_category_ids(node_id) if not category_id in existing_category_ids: - logger.info( + logger.debug( "Category with ID -> %s is not yet assigned to node ID -> %s. Assigning it now...", str(category_id), str(node_id), @@ -6836,7 +8271,7 @@ def assign_category( "category_id": category_id, } - logger.info( + logger.debug( "Assign category with ID -> %s to item with ID -> %s; calling -> %s", str(category_id), str(node_id), @@ -6856,7 +8291,7 @@ def assign_category( break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6897,7 +8332,7 @@ def assign_category( break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -6910,47 +8345,151 @@ def assign_category( ) return False - # - # 3. Apply to sub-items - # + # + # 3. Apply to sub-items + # + + if apply_to_sub_items: + request_url_apply_sub_items = request_url + "/apply" + + category_post_data = { + "categories": [{"id": category_id, "action": apply_action}], + "add_version": add_version, + "clear_existing_categories": clear_existing_categories, + } + + retries = 0 + while True: + # we need to wrap the body of this POST call into a "body" + # tag. This is documented worngly on developer.opentext.com + response = requests.post( + url=request_url_apply_sub_items, + data={"body": json.dumps(category_post_data)}, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + break + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to apply category with ID -> %s to sub-items of node with ID -> %s; status -> %s; error -> %s", + str(category_id), + str(node_id), + response.status_code, + response.text, + ) + return False + return True + + # end method definition + + def get_category_value_by_name( + self, + node_id: int, + category_name: str, + attribute_name: str, + set_name: str | None = None, + set_row: int = 1, + ) -> str | list | None: + """Lookup the value of an attribute if category name, set name and attribute name + are known. + + Args: + node_id (int): ID of the node the category is assigned to. + category_name (str): Name of the category. + attribute_name (str): Name of the attribute. + set_name (str | None, optional): Name of the set. Defaults to None. + set_row (int, optional): Index of the row (first row = 1!). Defaults to 1. + + Returns: + str | list | None: The value of the attribute. If it is a multi-value attribute a list will be returned. + """ + + (_, cat_definitions) = self.get_node_category_definition(node_id, category_name) + if not cat_definitions: + logger.warning("No categories are assigned to node -> %s", str(node_id)) + return None + + if set_name: + lookup = set_name + ":" + attribute_name + else: + lookup = attribute_name + + if not lookup in cat_definitions: + logger.error("Cannot find attribute -> '%s' in category -> '%s'") + + att_def = cat_definitions[lookup] + att_id = att_def["id"] + if "_x_" in att_id: + att_id = att_id.replace("_x_", "_" + str(set_row) + "_") + + value = None + + response = self.get_node_categories(node_id=node_id, metadata=False) + categories = response["results"] + for category in categories: + if att_id in category["data"]["categories"]: + value = category["data"]["categories"][att_id] + break + + return value + + # end method definition + + def get_category_value( + self, + node_id: int, + category_id: int, + attribute_id: int, + set_id: int | None = None, + set_row: int = 1, + ) -> str | list | None: + """Lookup the value of an attribute if category ID, set ID and attribute ID + are known. If you only have the names use get_category_value_by_name() + + Args: + node_id (int): Node ID the category is assigned to + category_id (int): ID of the category + attribute_id (int): ID of the attribute (the pure ID without underscores) + set_id (int, optional): ID of the set. Defaults to None. + set_row (int, optional): Index of the row (first row = 1!). Defaults to 1. + + Returns: + str | list | None: The value of the attribute. If it is a multi-value attribute a list will be returned. + """ + + if set_id and set_row: + att_id = ( + str(category_id) + + "_" + + str(set_id) + + "_" + + str(set_row) + + "_" + + str(attribute_id) + ) + elif set_id: + att_id = str(category_id) + "_" + str(set_id) + "_" + str(attribute_id) + else: + att_id = str(category_id) + "_" + str(attribute_id) + + response = self.get_node_categories(node_id=node_id, metadata=False) + categories = response["results"] - if apply_to_sub_items: - request_url_apply_sub_items = request_url + "/apply" + value = None - category_post_data = { - "categories": [{"id": category_id, "action": apply_action}], - "add_version": add_version, - "clear_existing_categories": clear_existing_categories, - } + for category in categories: + if att_id in category["data"]["categories"]: + value = category["data"]["categories"][att_id] + break - retries = 0 - while True: - # we need to wrap the body of this POST call into a "body" - # tag. This is documented worngly on developer.opentext.com - response = requests.post( - url=request_url_apply_sub_items, - data={"body": json.dumps(category_post_data)}, - headers=request_header, - cookies=self.cookie(), - timeout=None, - ) - if response.ok: - break - # Check if Session has expired - then re-authenticate and try once more - elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") - self.authenticate(revalidate=True) - retries += 1 - else: - logger.error( - "Failed to apply category with ID -> %s to sub-items of node with ID -> %s; status -> %s; error -> %s", - str(category_id), - str(node_id), - response.status_code, - response.text, - ) - return False - return True + return value # end method definition @@ -6970,7 +8509,7 @@ def set_category_value( node_id (int): ID of the node value (multi-typed): value to be set - can be string or list of strings (for multi-value attributes) category_id (int):ID of the category object - attribute_id (int): ID of the attribute + attribute_id (int): ID of the attribute, this should not include the category ID nor an underscore but the plain attribute ID like '10' set_id (int, optional): ID of the set. Defaults to 0. set_row (int, optional): Row of . Defaults to 1. @@ -6988,8 +8527,8 @@ def set_category_value( request_header = self.request_form_header() if set_id: - logger.info( - "Assign value -> %s to category with ID -> %s, set ID -> %s, row -> %s, attribute ID -> %s on node with ID -> %s; calling -> %s", + logger.debug( + "Assign value -> '%s' to category with ID -> %s, set ID -> %s, row -> %s, attribute ID -> %s on node with ID -> %s; calling -> %s", str(value), str(category_id), str(set_id), @@ -7003,8 +8542,8 @@ def set_category_value( "{}_{}_{}_{}".format(category_id, set_id, set_row, attribute_id): value, } else: - logger.info( - "Assign value -> %s to category ID -> %s, attribute ID -> %s on node with ID -> %s; calling -> %s", + logger.debug( + "Assign value -> '%s' to category ID -> %s, attribute ID -> %s on node with ID -> %s; calling -> %s", str(value), str(category_id), str(attribute_id), @@ -7029,12 +8568,12 @@ def set_category_value( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to set value -> %s for category with ID -> %s, attribute ID -> %s on node ID -> %s; status -> %s; error -> %s", + "Failed to set value -> '%s' for category with ID -> %s, attribute ID -> %s on node ID -> %s; status -> %s; error -> %s", str(value), str(category_id), str(attribute_id), @@ -7046,10 +8585,162 @@ def set_category_value( # end method definition + def set_category_values( + self, + node_id: int, + category_id: int, + category_data: dict, + ) -> dict | None: + """Set values of a category. Categories and have sets (groupings), multi-line sets (matrix), + and multi-value attributes (list of values). This method supports all variants. + + Args: + node_id (int): ID of the node + value (multi-typed): value to be set - can be string or list of strings (for multi-value attributes) + category_id (int):ID of the category object + category_data (dict): dictionary with category attributes and values + + Returns: + dict: REST API response or None if the call fails + """ + + request_url = ( + self.config()["nodesUrlv2"] + + "/" + + str(node_id) + + "/categories/" + + str(category_id) + ) + request_header = self.request_form_header() + + category_put_data = {"category_id": category_id} + category_put_data.update(category_data) + + logger.debug( + "Set values -> %s for category ID -> %s on node -> %s...", + category_data, + category_id, + node_id, + ) + + retries = 0 + while True: + response = requests.put( + url=request_url, + data=category_put_data, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + logger.error( + "Failed to set values -> '%s' for category with ID -> %s, on node ID -> %s; status -> %s; error -> %s", + str(category_data), + str(category_id), + str(node_id), + response.status_code, + response.text, + ) + return None + + # end method definition + + def set_category_inheritance( + self, node_id: int, category_id: int, enable: bool = True + ) -> dict | None: + """Set if we want a container item (e.g. a folder or workspace) to inherit + categories to sub-items. + + Args: + node_id (int): Node ID of the container item. + category_id (int): Node ID of the category item. + enable (bool): Whether the inheritance should be enabled (True) or disabled (False) + + Returns: + dict | None: Response of the request or None in case of an error. + """ + + request_url = ( + self.config()["nodesUrlv2"] + + "/" + + str(node_id) + + "/categories/" + + str(category_id) + + "/inheritance" + ) + request_header = self.request_form_header() + + if enable: + logger.debug( + "Enable category inheritance for node with ID -> %s and category ID -> %s; calling -> %s", + str(node_id), + str(category_id), + request_url, + ) + else: + logger.debug( + "Disable category inheritance of node with ID -> %s and category ID -> %s; calling -> %s", + str(node_id), + str(category_id), + request_url, + ) + + retries = 0 + while True: + if enable: + response = requests.post( + url=request_url, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + else: + response = requests.delete( + url=request_url, + headers=request_header, + cookies=self.cookie(), + timeout=None, + ) + if response.ok: + return self.parse_request_response(response) + # Check if Session has expired - then re-authenticate and try once more + elif response.status_code == 401 and retries == 0: + logger.debug("Session has expired - try to re-authenticate...") + self.authenticate(revalidate=True) + retries += 1 + else: + if enable: + logger.error( + "Failed to enable categories inheritance for node ID -> %s and category ID -> %s; status -> %s; error -> %s", + str(node_id), + str(category_id), + response.status_code, + response.text, + ) + else: + logger.error( + "Failed to disable categories inheritance for node ID -> %s and category ID -> %s; status -> %s; error -> %s", + str(node_id), + str(category_id), + response.status_code, + response.text, + ) + return None + + # end method definition + def assign_classification( self, node_id: int, classifications: list, apply_to_sub_items: bool = False ) -> dict | None: """Assign one or multiple classifications to an Extended ECM item + Args: node_id (int): node ID of the Extended ECM item classifications (list): list of classification item IDs @@ -7077,7 +8768,7 @@ def assign_classification( request_header = self.request_form_header() - logger.info( + logger.debug( "Assign classifications with IDs -> %s to item with ID -> %s; calling -> %s", str(classifications), str(node_id), @@ -7097,7 +8788,7 @@ def assign_classification( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7138,7 +8829,7 @@ def assign_rm_classification( request_header = self.request_form_header() - logger.info( + logger.debug( "Assign RM classifications with ID -> %s to item with ID -> %s; calling -> %s", str(rm_classification), str(node_id), @@ -7158,7 +8849,7 @@ def assign_rm_classification( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7175,6 +8866,7 @@ def assign_rm_classification( def register_workspace_template(self, node_id: int) -> dict | None: """Register a workspace template as project template for Extended ECM for Engineering + Args: node_id (int): node ID of the Extended ECM workspace template Returns: @@ -7187,7 +8879,7 @@ def register_workspace_template(self, node_id: int) -> dict | None: request_header = self.request_form_header() - logger.info( + logger.debug( "Register workspace template with ID -> %s; calling -> %s", str(node_id), request_url, @@ -7206,7 +8898,7 @@ def register_workspace_template(self, node_id: int) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7271,7 +8963,7 @@ def get_records_management_rsis(self, limit: int = 100) -> list | None: request_url = self.config()["rsisUrl"] + "?limit=" + str(limit) request_header = self.request_form_header() - logger.info("Get list of Records Management RSIs; calling -> %s", request_url) + logger.debug("Get list of Records Management RSIs; calling -> %s", request_url) retries = 0 while True: @@ -7286,7 +8978,7 @@ def get_records_management_rsis(self, limit: int = 100) -> list | None: return rsi_dict["results"]["data"]["rsis"] # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7313,7 +9005,7 @@ def get_records_management_codes(self) -> dict | None: request_url = self.config()["recordsManagementUrlv2"] + "/rmcodes" request_header = self.request_form_header() - logger.info("Get list of Records Management codes; calling -> %s", request_url) + logger.debug("Get list of Records Management codes; calling -> %s", request_url) retries = 0 while True: @@ -7328,7 +9020,7 @@ def get_records_management_codes(self) -> dict | None: return rm_codes_dict["results"]["data"] # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7359,7 +9051,7 @@ def update_records_management_codes(self, rm_codes: dict) -> dict | None: request_url = self.config()["recordsManagementUrl"] + "/rmcodes" request_header = self.request_form_header() - logger.info( + logger.debug( "Update Records Management codes -> %s; calling -> %s", str(rm_codes), request_url, @@ -7379,7 +9071,7 @@ def update_records_management_codes(self, rm_codes: dict) -> dict | None: return rm_codes_dict["results"]["data"] # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7434,7 +9126,7 @@ def create_records_management_rsi( request_header = self.request_form_header() - logger.info( + logger.debug( "Create Records Management RSI -> %s; calling -> %s", name, request_url ) @@ -7451,7 +9143,7 @@ def create_records_management_rsi( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7566,7 +9258,7 @@ def create_records_management_rsi_schedule( request_header = self.request_form_header() - logger.info( + logger.debug( "Create Records Management RSI Schedule -> %s for RSI -> %s; calling -> %s", stage, str(rsi_id), @@ -7586,7 +9278,7 @@ def create_records_management_rsi_schedule( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7644,7 +9336,7 @@ def create_records_management_hold( request_header = self.request_form_header() - logger.info( + logger.debug( "Create Records Management Hold -> %s; calling -> %s", name, request_url ) @@ -7661,7 +9353,7 @@ def create_records_management_hold( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7714,7 +9406,7 @@ def get_records_management_holds(self) -> dict | None: request_header = self.request_form_header() - logger.info("Get list of Records Management Holds; calling -> %s", request_url) + logger.debug("Get list of Records Management Holds; calling -> %s", request_url) retries = 0 while True: @@ -7728,7 +9420,7 @@ def get_records_management_holds(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -7756,8 +9448,8 @@ def import_records_management_settings(self, file_path: str) -> bool: self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Records Management Settings from file -> %s; calling -> %s", + logger.debug( + "Importing Records Management Settings from file -> '%s'; calling -> %s", file_path, request_url, ) @@ -7765,7 +9457,7 @@ def import_records_management_settings(self, file_path: str) -> bool: filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -7787,12 +9479,12 @@ def import_records_management_settings(self, file_path: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Records Management Settings from file -> %s; status -> %s; error -> %s", + "Failed to import Records Management Settings from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -7805,6 +9497,7 @@ def import_records_management_codes( self, file_path: str, update_existing_codes: bool = True ) -> bool: """Import RM Codes from a file that is uploaded from the python pod + Args: file_path (str): path + filename of settings file in Python container filesystem update_existing_codes (bool): Flag that controls whether existing table maintenance codes @@ -7819,8 +9512,8 @@ def import_records_management_codes( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Records Management Codes from file -> %s; calling -> %s", + logger.debug( + "Importing Records Management Codes from file -> '%s'; calling -> %s", file_path, request_url, ) @@ -7830,7 +9523,7 @@ def import_records_management_codes( filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -7853,12 +9546,12 @@ def import_records_management_codes( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Records Management Codes from file -> %s; status -> %s; error -> %s", + "Failed to import Records Management Codes from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -7874,6 +9567,7 @@ def import_records_management_rsis( delete_schedules: bool = False, ) -> bool: """Import RM RSIs from a config file that is uploaded from the Python pod + Args: file_path (str): path + filename of config file in Python container filesystem update_existing_rsis (bool, optional): whether or not existing RSIs should be updated (or ignored) @@ -7888,8 +9582,8 @@ def import_records_management_rsis( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Records Management RSIs from file -> %s; calling -> %s", + logger.debug( + "Importing Records Management RSIs from file -> '%s'; calling -> %s", file_path, request_url, ) @@ -7902,7 +9596,7 @@ def import_records_management_rsis( filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -7925,12 +9619,12 @@ def import_records_management_rsis( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Records Management RSIs from file -> %s; status -> %s; error -> %s", + "Failed to import Records Management RSIs from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -7941,6 +9635,7 @@ def import_records_management_rsis( def import_physical_objects_settings(self, file_path: str) -> bool: """Import Physical Objects settings from a config file that is uploaded from the python pod + Args: file_path (str): path + filename of config file in Python container filesystem Returns: @@ -7953,8 +9648,8 @@ def import_physical_objects_settings(self, file_path: str) -> bool: self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Physical Objects Settings from server file -> %s; calling -> %s", + logger.debug( + "Importing Physical Objects Settings from server file -> '%s'; calling -> %s", file_path, request_url, ) @@ -7962,7 +9657,7 @@ def import_physical_objects_settings(self, file_path: str) -> bool: filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -7984,12 +9679,12 @@ def import_physical_objects_settings(self, file_path: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Physical Objects settings from file -> %s; status -> %s; error -> %s", + "Failed to import Physical Objects settings from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -8002,6 +9697,7 @@ def import_physical_objects_codes( self, file_path: str, update_existing_codes: bool = True ) -> bool: """Import Physical Objects codes from a config file that is uploaded from the Python pod + Args: file_path (str): path + filename of config file in Python container filesystem update_existing_codes (bool): whether or not existing codes should be updated (default = True) @@ -8015,8 +9711,8 @@ def import_physical_objects_codes( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Physical Objects Codes from file -> %s; calling -> %s", + logger.debug( + "Importing Physical Objects Codes from file -> '%s'; calling -> %s", file_path, request_url, ) @@ -8026,7 +9722,7 @@ def import_physical_objects_codes( filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -8049,12 +9745,12 @@ def import_physical_objects_codes( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Physical Objects Codes from file -> %s; status -> %s; error -> %s", + "Failed to import Physical Objects Codes from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -8065,6 +9761,7 @@ def import_physical_objects_codes( def import_physical_objects_locators(self, file_path: str) -> bool: """Import Physical Objects locators from a config file that is uploaded from the python pod + Args: file_path (str): path + filename of config file in Python container filesystem Returns: @@ -8077,8 +9774,8 @@ def import_physical_objects_locators(self, file_path: str) -> bool: self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Physical Objects Locators from file -> %s; calling -> %s", + logger.debug( + "Importing Physical Objects Locators from file -> '%s'; calling -> %s", file_path, request_url, ) @@ -8086,7 +9783,7 @@ def import_physical_objects_locators(self, file_path: str) -> bool: filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -8108,12 +9805,12 @@ def import_physical_objects_locators(self, file_path: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Physical Objects Locators from file -> %s; status -> %s; error -> %s", + "Failed to import Physical Objects Locators from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -8126,6 +9823,7 @@ def import_security_clearance_codes( self, file_path: str, include_users: bool = False ) -> bool: """Import Security Clearance codes from a config file that is uploaded from the python pod + Args: file_path (str): path + filename of config file in Python container filesystem include_users (bool): defines if users should be included or not @@ -8139,8 +9837,8 @@ def import_security_clearance_codes( self.cookie() ) # for some reason we have to omit the other header parts here - otherwise we get a 400 response - logger.info( - "Importing Security Clearance Codes from file -> %s; calling -> %s", + logger.debug( + "Importing Security Clearance Codes from file -> '%s'; calling -> %s", file_path, request_url, ) @@ -8150,7 +9848,7 @@ def import_security_clearance_codes( filename = os.path.basename(file_path) if not os.path.exists(file_path): logger.error( - "The file -> %s does not exist in path -> %s!", + "The file -> '%s' does not exist in path -> '%s'!", filename, os.path.dirname(file_path), ) @@ -8173,12 +9871,12 @@ def import_security_clearance_codes( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: logger.error( - "Failed to import Security Clearance Codes from file -> %s; status -> %s; error -> %s", + "Failed to import Security Clearance Codes from file -> '%s'; status -> %s; error -> %s", file_path, response.status_code, response.text, @@ -8208,7 +9906,7 @@ def assign_user_security_clearance( ] + "/{}/securityclearancelevel".format(user_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Assign security clearance -> %s to user with ID -> %s; calling -> %s", str(security_clearance), str(user_id), @@ -8228,7 +9926,7 @@ def assign_user_security_clearance( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -8264,7 +9962,7 @@ def assign_user_supplemental_markings( ] + "/{}/supplementalmarkings".format(user_id) request_header = self.request_form_header() - logger.info( + logger.debug( "Assign supplemental markings -> %s to user with ID -> %s; calling -> %s", str(supplemental_markings), str(user_id), @@ -8284,7 +9982,7 @@ def assign_user_supplemental_markings( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -8321,12 +10019,12 @@ def check_workspace_aviator( if result_data and "data" in result_data: data = result_data["data"] if "disableai" in data: - logger.info( + logger.debug( "Aviator is enabled for workspace with ID -> %s", str(workspace_id) ) return True elif "enableai" in data: - logger.info( + logger.debug( "Aviator is disabled for workspace with ID -> %s", str(workspace_id) ) @@ -8356,13 +10054,13 @@ def update_workspace_aviator( request_header = self.request_form_header() if status is True: - logger.info( + logger.debug( "Enable Content Aviator for workspace with ID -> %s; calling -> %s", str(workspace_id), request_url, ) else: - logger.info( + logger.debug( "Disable Content Aviator for workspace with ID -> %s; calling -> %s", str(workspace_id), request_url, @@ -8381,7 +10079,7 @@ def update_workspace_aviator( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -8438,8 +10136,8 @@ def volume_translator( names_multilingual[language] = translator.translate( "en", language, names_multilingual["en"] ) - logger.info( - "Translate name of node -> %s from -> %s (%s) to -> %s (%s)", + logger.debug( + "Translate name of node -> %s from -> '%s' (%s) to -> '%s' (%s)", current_node_id, name, "en", @@ -8454,8 +10152,8 @@ def volume_translator( descriptions_multilingual[language] = translator.translate( "en", language, descriptions_multilingual["en"] ) - logger.info( - "Translate description of node -> %s from -> %s (%s) to -> %s (%s)", + logger.debug( + "Translate description of node -> %s from -> '%s' (%s) to -> '%s' (%s)", current_node_id, name, "en", @@ -8483,3 +10181,386 @@ def volume_translator( ) # end method definition + + def download_document_multi_threading(self, node_id: int, file_path: str): + """Multi-threading variant of download_document() + + Args: + node_id (int): Node ID of the document to download + file_path (str): file system path - location to download to + """ + + # Aquire and Release Thread limit to limit parallel executions + + with self._semaphore: + self.download_document(node_id=node_id, file_path=file_path) + + # end method definition + + def apply_filter( + self, + node: dict, + current_depth: int = 0, + filter_depth: int | None = None, + filter_subtypes: list | None = None, + filter_category: str | None = None, + filter_attributes: dict | list | None = None, + ) -> bool: + """Check all defined filters for the given node. + + Args: + node (dict): Current OTCS Node + filter_depth (int | None, optional): Additive filter criterium for path depth. Defaults to None = filter not active. + filter_subtype (list | None, optional): Additive filter criterium for workspace type. Defaults to None = filter not active. + filter_category (str | None, optional): Additive filter criterium for workspace category. Defaults to None = filter not active. + filter_attributes (dict | list | None, optional): _description_. Defaults to None. + + Returns: + bool: Only for nodes that comply with ALL provided filters True is returned. Otherwise False. + """ + + if not node or not "type" in node or not "id" in node: + return False + + if filter_subtypes and not node["type"] in filter_subtypes: + return False + + if filter_depth is not None and filter_depth != current_depth: + return False + + if filter_category: + category_names = self.get_node_category_names(node_id=node["id"]) + if not category_names or filter_category not in category_names: + return False + if filter_attributes: + if isinstance(filter_attributes, dict): + filter_attributes = [filter_attributes] + for filter_attribute in filter_attributes: + filter_category_name = filter_attribute.get( + "category", filter_category + ) + if not filter_category_name: + continue + filter_set_name = filter_attribute.get("set", None) + filter_attribute_name = filter_attribute.get("attribute", None) + if not filter_attribute_name: + continue + filter_row = filter_attribute.get("row", None) + filter_value = filter_attribute.get("value", None) + actual_value = self.get_category_value_by_name( + node_id=node["id"], + category_name=filter_category_name, + set_name=filter_set_name, + attribute_name=filter_attribute_name, + set_row=filter_row, + ) + # Both actual value and filter value can be strings or list of strings. + # So we need to handle a couple of cases here: + + # Case 1: Data source delivers a list and filter value is a scalar value (int, str, float) + if isinstance(actual_value, list) and isinstance( + filter_value, (str, int, float) + ): + if filter_value not in actual_value: + return False + # Case 2: Data source delivers a scalar value and filter value is a list + elif isinstance(actual_value, (str, int, float)) and isinstance( + filter_value, list + ): + if actual_value not in filter_value: + return False + # Case 3: Both, filter and actual value are lists: + elif isinstance(actual_value, list) and isinstance( + filter_value, list + ): + # check if there's an non-empty intersetion set of both lists: + if not (set(actual_value) & set(filter_value)): + return False + elif isinstance(actual_value, (str, int, float)) and isinstance( + filter_value, (str, int, float) + ): + if actual_value != filter_value: + return False + else: + return False + + return True + + # end method definition + + def load_items( + self, + node_id: int, + folder_path: list | None = None, + current_depth: int = 0, + workspace_type: int | None = None, + workspace_id: str | None = None, + workspace_name: str | None = None, + workspace_description: str | None = None, + filter_workspace_depth: int | None = None, + filter_workspace_subtypes: list | None = None, + filter_workspace_category: str | None = None, + filter_workspace_attributes: dict | list | None = None, + filter_item_depth: int | None = None, + filter_item_category: str | None = None, + filter_item_attributes: dict | list | None = None, + workspace_metadata: bool = True, + item_metadata: bool = True, + skip_existing_downloads: bool = True, + ) -> bool: + """Create a Pandas Data Frame by traversing a given Content Server hierarchy and collecting workspace and document items. + + Args: + node_id (int): currrent Node ID (in recursive processing). Initially this is the starting node (root of the traversal) + folder_path (str, optional): The current path from the starting node to the current node. Defaults to None. + current_depth (int): The current depth in the tree that is traversed. + workspace_type (int | None, optional): Type of the workspace (if already found in the hierarchy). Defaults to None. + workspace_id (str | None, optional): ID of the workspace (if already found in the hierarchy). Defaults to None. + workspace_name (str | None, optional): Name of the workspace (if already found in the hierarchy). Defaults to None. + workspace_description (str | None, optional): Description of the workspace (if already found in the hierarchy). Defaults to None. + filter_workspace_depth (int | None, optional): Additive filter criterium for path depth. Defaults to None = filter not active. + filter_workspace_subtype (list | None, optional): Additive filter criterium for workspace type. Defaults to None = filter not active. + filter_workspace_category (str | None, optional): Additive filter criterium for workspace category. Defaults to None = filter not active. + + Returns: + bool: True = success, False = Error + """ + + if folder_path is None: + folder_path = [] # required for list concatenation below + + # Create folder if it does not exist + try: + os.makedirs(self._download_dir) + except FileExistsError: + pass + + # Aquire and Release threading semaphore to limit parallel executions + # to not overload the source Content Server system: + with self._semaphore: + subnodes = self.get_subnodes(parent_node_id=node_id) + + # Initialize traversal threads: + traversal_threads = [] + + for subnode in subnodes.get("results", []): + subnode = subnode.get("data").get("properties") + + # Initiaze download threads for this subnode: + download_threads = [] + + match subnode["type"]: + + case 0 | 848: # folder or workspace + # First we check if we have found a workspace already: + if not workspace_id: + # Second we apply the defined filters to the current node. Only "workspaces" + # that comply with ALL provided filters are considered and written into the data frame + found_workspace = self.apply_filter( + node=subnode, + current_depth=current_depth, + filter_depth=filter_workspace_depth, + filter_subtypes=filter_workspace_subtypes, + filter_category=filter_workspace_category, + filter_attributes=filter_workspace_attributes, + ) + else: + # otherwise the current node cannot be a workspace: + # For future improvements we could look at supporting sub-workspaces: + found_workspace = False + + if found_workspace: + logger.info( + "Found workspace -> '%s' (%s) in depth -> %s. Adding to Data Frame...", + subnode["name"], + subnode["id"], + current_depth, + ) + workspace_type = subnode["type"] + workspace_id = subnode["id"] + workspace_name = subnode["name"] + workspace_description = subnode["description"] + row = {} + row["workspace_type"] = workspace_type + row["workspace_id"] = workspace_id + row["workspace_name"] = workspace_name + row["workspace_description"] = workspace_description + row["workspace_outer_path"] = folder_path + if workspace_metadata: + categories = self.get_node_categories( + workspace_id, metadata=False + ) + if categories and categories["results"]: + for category in categories["results"]: + if ( + not "data" in category + or not "categories" in category["data"] + ): + continue + attributes = category["data"]["categories"] + for key in attributes: + value = attributes[key] + row["workspace_" + key] = value + + # Now we add the article to the Pandas Data Frame in the Data class: + with self._data.lock(): + self._data.append(row) + subfolder = [] # now we switch to workspace inner path + else: # we treat the current folder / workspaces just as a container + logger.info( + "Node -> '%s' (%s) in depth -> %s is NOT a workspace. Keep traversing...", + subnode["name"], + subnode["id"], + current_depth, + ) + subfolder = folder_path + [subnode["name"]] + + # Recursive call to start threads for sub-items: + thread = threading.Thread( + target=self.load_items, + args=( + subnode["id"], + subfolder, + current_depth + 1, + workspace_type, + workspace_id, + workspace_name, + workspace_description, + filter_workspace_depth, + filter_workspace_subtypes, + filter_workspace_category, + filter_workspace_attributes, + filter_item_depth, + filter_item_category, + filter_item_attributes, + workspace_metadata, + item_metadata, + skip_existing_downloads, + ), + name="traverse_node_{}".format(subnode["id"]), + ) + thread.start() + traversal_threads.append(thread) + + case 1: # shortcuts + pass + + case 854: # Related Workspaces - we don't want to run into loops! + pass + + case 751: # E-Mail folders + pass + + case 123469: # Forum + pass + + case 144: # document + # We apply the defined filters to the current node. Only "documents" + # that comply with ALL provided filters are considered and written into the data frame + found_document = self.apply_filter( + node=subnode, + current_depth=current_depth, + filter_depth=filter_item_depth, + filter_category=filter_item_category, + filter_attributes=filter_item_attributes, + ) + + if not found_document: + continue + + # We use the node ID as the filename to avoid any + # issues with too long or not valid file names. + # As the Pandas DataFrame has all information + # this is easy to resolve at upload time. + file_path = "{}/{}".format(self._download_dir, subnode["id"]) + + # We only consider documents that are inside the defined "workspaces": + if workspace_id: + logger.info( + "Found document -> '%s' (%s) in depth -> %s inside workspace -> '%s' (%s). Adding to Data Frame...", + subnode["name"], + subnode["id"], + current_depth, + workspace_name, + workspace_id, + ) + else: + logger.warning( + "Found document -> '%s' (%s) in depth -> %s outside of workspace. Ignoring...", + subnode["name"], + subnode["id"], + current_depth, + ) + + # We download only if not downloaded before or if downloaded + # before but forced to re-download: + if not os.path.exists(file_path) or not skip_existing_downloads: + # + # Start anasynchronous Download Thread: + # + thread = threading.Thread( + target=self.download_document_multi_threading, + args=(subnode["id"], file_path), + name="download_document_node_{}".format(subnode["id"]), + ) + thread.start() + download_threads.append(thread) + else: + logger.info( + "File -> %s has been downloaded before. Skipping download...", + file_path, + ) + + row = {} + row["workspace_type"] = workspace_type + row["workspace_id"] = workspace_id + row["workspace_name"] = workspace_name + row["workspace_description"] = workspace_description + row["item_id"] = str(subnode["id"]) + row["item_name"] = subnode["name"] + row["item_description"] = subnode["description"] + row["item_path"] = folder_path + row["item_download_name"] = str(subnode["id"]) + if item_metadata: + categories = self.get_node_categories( + subnode["id"], metadata=False + ) + if categories and categories["results"]: + for category in categories["results"]: + if ( + not "data" in category + or not "categories" in category["data"] + ): + continue + attributes = category["data"]["categories"] + for key in attributes: + value = attributes[key] + row["item_" + key] = value + + # Now we add the row to the Pandas Data Frame in the Data class: + with self._data.lock(): + self._data.append(row) + case 140: # url + logger.info( + "Found URL object %s with %s", subnode["id"], subnode["url"] + ) + + case _: + logger.warning( + "Don't know what to do with item -> %s (%s) of type %s", + subnode["name"], + subnode["id"], + subnode["type"], + ) + + # Wait for all download threads to complete: + for thread in download_threads: + thread.join() + + # Wait for all traversal threads to complete: + for thread in traversal_threads: + thread.join() + + return True + + # end method definition diff --git a/pyxecm/otds.py b/pyxecm/otds.py index b5cf137..3a3828b 100644 --- a/pyxecm/otds.py +++ b/pyxecm/otds.py @@ -213,6 +213,8 @@ def config(self) -> dict: """ return self._config + # end method definition + def cookie(self) -> dict: """Returns the login cookie of OTDS. This is set by the authenticate() method @@ -222,6 +224,8 @@ def cookie(self) -> dict: """ return self._cookie + # end method definition + def credentials(self) -> dict: """Returns the credentials (username + password) @@ -233,6 +237,8 @@ def credentials(self) -> dict: "password": self.config()["password"], } + # end method definition + def base_url(self) -> str: """Returns the base URL of OTDS @@ -241,6 +247,8 @@ def base_url(self) -> str: """ return self.config()["baseUrl"] + # end method definition + def rest_url(self) -> str: """Returns the REST URL of OTDS @@ -249,6 +257,8 @@ def rest_url(self) -> str: """ return self.config()["restUrl"] + # end method definition + def credential_url(self) -> str: """Returns the Credentials URL of OTDS @@ -257,6 +267,8 @@ def credential_url(self) -> str: """ return self.config()["credentialUrl"] + # end method definition + def auth_handler_url(self) -> str: """Returns the Auth Handler URL of OTDS @@ -265,6 +277,8 @@ def auth_handler_url(self) -> str: """ return self.config()["authHandlerUrl"] + # end method definition + def partition_url(self) -> str: """Returns the Partition URL of OTDS @@ -273,6 +287,8 @@ def partition_url(self) -> str: """ return self.config()["partitionUrl"] + # end method definition + def access_role_url(self) -> str: """Returns the Access Role URL of OTDS @@ -281,6 +297,8 @@ def access_role_url(self) -> str: """ return self.config()["accessRoleUrl"] + # end method definition + def oauth_client_url(self) -> str: """Returns the OAuth Client URL of OTDS @@ -289,6 +307,8 @@ def oauth_client_url(self) -> str: """ return self.config()["oauthClientUrl"] + # end method definition + def resource_url(self) -> str: """Returns the Resource URL of OTDS @@ -297,6 +317,8 @@ def resource_url(self) -> str: """ return self.config()["resourceUrl"] + # end method definition + def license_url(self) -> str: """Returns the License URL of OTDS @@ -305,6 +327,8 @@ def license_url(self) -> str: """ return self.config()["licenseUrl"] + # end method definition + def token_url(self) -> str: """Returns the Token URL of OTDS @@ -313,6 +337,8 @@ def token_url(self) -> str: """ return self.config()["tokenUrl"] + # end method definition + def users_url(self) -> str: """Returns the Users URL of OTDS @@ -321,6 +347,8 @@ def users_url(self) -> str: """ return self.config()["usersUrl"] + # end method definition + def groups_url(self) -> str: """Returns the Groups URL of OTDS @@ -329,6 +357,8 @@ def groups_url(self) -> str: """ return self.config()["groupsUrl"] + # end method definition + def system_config_url(self) -> str: """Returns the System Config URL of OTDS @@ -337,6 +367,8 @@ def system_config_url(self) -> str: """ return self.config()["systemConfigUrl"] + # end method definition + def consolidation_url(self) -> str: """Returns the Consolidation URL of OTDS @@ -345,6 +377,8 @@ def consolidation_url(self) -> str: """ return self.config()["consolidationUrl"] + # end method definition + def parse_request_response( self, response_object: object, @@ -396,7 +430,7 @@ def authenticate(self, revalidate: bool = False) -> dict | None: # Already authenticated and session still valid? if self._cookie and not revalidate: - logger.info( + logger.debug( "Session still valid - return existing cookie -> %s", str(self._cookie), ) @@ -404,7 +438,7 @@ def authenticate(self, revalidate: bool = False) -> dict | None: otds_ticket = "NotSet" - logger.info("Requesting OTDS ticket from -> %s", self.credential_url()) + logger.debug("Requesting OTDS ticket from -> %s", self.credential_url()) response = None try: @@ -429,7 +463,7 @@ def authenticate(self, revalidate: bool = False) -> dict | None: return None else: otds_ticket = authenticate_dict["ticket"] - logger.info("Ticket -> %s", otds_ticket) + logger.debug("Ticket -> %s", otds_ticket) else: logger.error("Failed to request an OTDS ticket; error -> %s", response.text) return None @@ -462,7 +496,7 @@ def add_license_to_resource( dict: Request response (dictionary) or None if the REST call fails """ - logger.info("Reading license file -> %s...", path_to_license_file) + logger.debug("Reading license file -> %s...", path_to_license_file) try: with open(path_to_license_file, "rt", encoding="UTF-8") as license_file: license_content = license_file.read() @@ -491,14 +525,14 @@ def add_license_to_resource( if existing_license: request_url += "/" + existing_license[0]["id"] else: - logger.info( + logger.debug( "No existing license for resource -> %s found - adding a new license...", resource_id, ) # change strategy to create a new license: update = False - logger.info( + logger.debug( "Adding product license -> %s for product -> %s to resource -> %s; calling -> %s", path_to_license_file, product_description, @@ -530,7 +564,7 @@ def add_license_to_resource( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -574,7 +608,7 @@ def get_license_for_resource(self, resource_id: str): + "&validOnly=false" ) - logger.info( + logger.debug( "Get license for resource -> %s; calling -> %s", resource_id, request_url ) @@ -593,7 +627,7 @@ def get_license_for_resource(self, resource_id: str): return response_dict["licenseObjects"]["_licenses"] # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -619,7 +653,7 @@ def delete_license_from_resource(self, resource_id: str, license_id: str) -> boo request_url = "{}/{}".format(self.license_url(), license_id) - logger.info( + logger.debug( "Deleting product license -> %s from resource -> %s; calling -> %s", license_id, resource_id, @@ -638,7 +672,7 @@ def delete_license_from_resource(self, resource_id: str, license_id: str) -> boo return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -707,7 +741,7 @@ def assign_user_to_license( request_url = self.license_url() + "/object/" + license_location - logger.info( + logger.debug( "Assign license feature -> %s of license -> %s associated with resource -> %s to user -> %s; calling -> %s", license_feature, license_location, @@ -726,7 +760,7 @@ def assign_user_to_license( timeout=None, ) if response.ok: - logger.info( + logger.debug( "Added license feature -> %s for user -> %s", license_feature, user_id, @@ -734,7 +768,7 @@ def assign_user_to_license( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -812,7 +846,7 @@ def assign_partition_to_license( request_url = self.license_url() + "/object/" + license_location - logger.info( + logger.debug( "Assign license feature -> %s of license -> %s associated with resource -> %s to partition -> %s; calling -> %s", license_feature, license_location, @@ -831,7 +865,7 @@ def assign_partition_to_license( timeout=None, ) if response.ok: - logger.info( + logger.debug( "Added license feature -> %s for partition -> %s", license_feature, partition_name, @@ -839,7 +873,7 @@ def assign_partition_to_license( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -925,7 +959,7 @@ def get_licensed_objects( + license_feature ) - logger.info( + logger.debug( "Get licensed objects for license -> %s and license feature -> %s associated with resource -> %s; calling -> %s", license_name, license_feature, @@ -945,7 +979,7 @@ def get_licensed_objects( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1102,7 +1136,7 @@ def add_partition(self, name: str, description: str) -> dict | None: request_url = self.partition_url() - logger.info( + logger.debug( "Adding user partition -> %s (%s); calling -> %s", name, description, @@ -1122,7 +1156,7 @@ def add_partition(self, name: str, description: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1149,7 +1183,7 @@ def get_partition(self, name: str, show_error: bool = True) -> dict | None: request_url = "{}/{}".format(self.config()["partitionUrl"], name) - logger.info("Getting user partition -> %s; calling -> %s", name, request_url) + logger.debug("Getting user partition -> %s; calling -> %s", name, request_url) retries = 0 while True: @@ -1163,7 +1197,7 @@ def get_partition(self, name: str, show_error: bool = True) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1213,7 +1247,7 @@ def add_user( request_url = self.users_url() - logger.info( + logger.debug( "Adding user -> %s to partition -> %s; calling -> %s", name, partition, @@ -1234,7 +1268,7 @@ def add_user( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1260,7 +1294,7 @@ def get_user(self, partition: str, user_id: str) -> dict | None: request_url = self.users_url() + "/" + user_id + "@" + partition - logger.info( + logger.debug( "Get user -> %s in partition -> %s; calling -> %s", user_id, partition, @@ -1279,7 +1313,7 @@ def get_user(self, partition: str, user_id: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1317,14 +1351,14 @@ def get_users(self, partition: str = "", limit: int | None = None) -> dict | Non request_url += "?{}".format(encodedQuery) if partition: - logger.info( + logger.debug( "Get all users in partition -> %s (limit -> %s); calling -> %s", partition, limit, request_url, ) else: - logger.info( + logger.debug( "Get all users (limit -> %s); calling -> %s", limit, request_url, @@ -1342,7 +1376,7 @@ def get_users(self, partition: str = "", limit: int | None = None) -> dict | Non return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1390,7 +1424,7 @@ def update_user( request_url = self.users_url() + "/" + user_id - logger.info( + logger.debug( "Update user -> %s attribute -> %s to value -> %s; calling -> %s", user_id, attribute_name, @@ -1411,9 +1445,12 @@ def update_user( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 + elif response.status_code == 404: + logger.warning("User does not exist -> %s", user_id) + return None else: logger.error( "Failed to update user -> %s; error -> %s (%s)", @@ -1437,7 +1474,7 @@ def delete_user(self, partition: str, user_id: str) -> bool: request_url = self.users_url() + "/" + user_id + "@" + partition - logger.info( + logger.debug( "Delete user -> %s in partition -> %s; calling -> %s", user_id, partition, @@ -1456,7 +1493,7 @@ def delete_user(self, partition: str, user_id: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1484,7 +1521,7 @@ def reset_user_password(self, user_id: str, password: str) -> bool: request_url = "{}/{}/password".format(self.users_url(), user_id) - logger.info( + logger.debug( "Resetting password for user -> %s; calling -> %s", user_id, request_url ) @@ -1501,7 +1538,7 @@ def reset_user_password(self, user_id: str, password: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1534,7 +1571,7 @@ def add_group(self, partition: str, name: str, description: str) -> dict | None: request_url = self.groups_url() - logger.info( + logger.debug( "Adding group -> %s to partition -> %s; calling -> %s", name, partition, @@ -1555,7 +1592,7 @@ def add_group(self, partition: str, name: str, description: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1596,7 +1633,7 @@ def get_group(self, group: str) -> dict | None: request_url = self.groups_url() + "/" + group - logger.info("Get group -> %s; calling -> %s", group, request_url) + logger.debug("Get group -> %s; calling -> %s", group, request_url) retries = 0 while True: @@ -1610,7 +1647,7 @@ def get_group(self, group: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1638,7 +1675,7 @@ def add_user_to_group(self, user: str, group: str) -> bool: request_url = self.users_url() + "/" + user + "/memberof" - logger.info( + logger.debug( "Adding user -> %s to group -> %s; calling -> %s", user, group, request_url ) @@ -1655,7 +1692,7 @@ def add_user_to_group(self, user: str, group: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1684,7 +1721,7 @@ def add_group_to_parent_group(self, group: str, parent_group: str) -> bool: request_url = self.groups_url() + "/" + group + "/memberof" - logger.info( + logger.debug( "Adding group -> %s to parent group -> %s; calling -> %s", group, parent_group, @@ -1705,7 +1742,7 @@ def add_group_to_parent_group(self, group: str, parent_group: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1751,7 +1788,7 @@ def add_resource( request_url = self.config()["resourceUrl"] - logger.info( + logger.debug( "Adding resource -> %s (%s); calling -> %s", name, description, request_url ) @@ -1768,7 +1805,7 @@ def add_resource( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1794,7 +1831,7 @@ def get_resource(self, name: str, show_error: bool = False) -> dict | None: request_url = "{}/{}".format(self.config()["resourceUrl"], name) - logger.info("Retrieving resource -> %s; calling -> %s", name, request_url) + logger.debug("Retrieving resource -> %s; calling -> %s", name, request_url) retries = 0 while True: @@ -1808,7 +1845,7 @@ def get_resource(self, name: str, show_error: bool = False) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1821,7 +1858,7 @@ def get_resource(self, name: str, show_error: bool = False) -> dict | None: response.text, ) else: - logger.info("Resource -> %s not found.", name) + logger.debug("Resource -> %s not found.", name) return None # end method definition @@ -1841,7 +1878,7 @@ def update_resource( request_url = "{}/{}".format(self.config()["resourceUrl"], name) - logger.info("Updating resource -> %s; calling -> %s", name, request_url) + logger.debug("Updating resource -> %s; calling -> %s", name, request_url) retries = 0 while True: @@ -1856,7 +1893,7 @@ def update_resource( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1869,7 +1906,7 @@ def update_resource( response.text, ) else: - logger.info("Resource -> %s not found.", name) + logger.debug("Resource -> %s not found.", name) return None # end method definition @@ -1887,7 +1924,7 @@ def activate_resource(self, resource_id: str) -> dict | None: request_url = "{}/{}/activate".format(self.config()["resourceUrl"], resource_id) - logger.info( + logger.debug( "Activating resource -> %s; calling -> %s", resource_id, request_url ) @@ -1904,7 +1941,7 @@ def activate_resource(self, resource_id: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1929,7 +1966,7 @@ def get_access_roles(self) -> dict | None: request_url = self.config()["accessRoleUrl"] - logger.info("Retrieving access roles; calling -> %s", request_url) + logger.debug("Retrieving access roles; calling -> %s", request_url) retries = 0 while True: @@ -1943,7 +1980,7 @@ def get_access_roles(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -1967,7 +2004,7 @@ def get_access_role(self, access_role: str) -> dict | None: request_url = self.config()["accessRoleUrl"] + "/" + access_role - logger.info( + logger.debug( "Retrieving access role -> %s; calling -> %s", access_role, request_url ) @@ -1983,7 +2020,7 @@ def get_access_role(self, access_role: str) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2020,7 +2057,7 @@ def add_partition_to_access_role( self.config()["accessRoleUrl"], access_role ) - logger.info( + logger.debug( "Add user partition -> %s to access role -> %s; calling -> %s", partition, access_role, @@ -2039,7 +2076,7 @@ def add_partition_to_access_role( if response.ok: return True elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2079,14 +2116,14 @@ def add_user_to_access_role( accessRoleUsers = accessRolesGetResponse["accessRoleMembers"]["users"] for user in accessRoleUsers: if user["displayName"] == user_id: - logger.info( + logger.debug( "User -> %s already added to access role -> %s", user_id, access_role, ) return True - logger.info( + logger.debug( "User -> %s is not yet in access role -> %s - adding...", user_id, access_role, @@ -2099,7 +2136,7 @@ def add_user_to_access_role( self.config()["accessRoleUrl"], access_role ) - logger.info( + logger.debug( "Add user -> %s to access role -> %s; calling -> %s", user_id, access_role, @@ -2118,7 +2155,7 @@ def add_user_to_access_role( if response.ok: return True elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2157,12 +2194,12 @@ def add_group_to_access_role( accessRoleGroups = accessRolesGetResponse["accessRoleMembers"]["groups"] for accessRoleGroup in accessRoleGroups: if accessRoleGroup["name"] == group: - logger.info( + logger.debug( "Group -> %s already added to access role -> %s", group, access_role ) return True - logger.info( + logger.debug( "Group -> %s is not yet in access role -> %s - adding...", group, access_role, @@ -2175,7 +2212,7 @@ def add_group_to_access_role( self.config()["accessRoleUrl"], access_role ) - logger.info( + logger.debug( "Add group -> %s to access role -> %s; calling -> %s", group, access_role, @@ -2194,7 +2231,7 @@ def add_group_to_access_role( if response.ok: return True elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2237,7 +2274,7 @@ def update_access_role_attributes( request_url = "{}/{}/attributes".format(self.config()["accessRoleUrl"], name) - logger.info( + logger.debug( "Update access role -> %s with attributes -> %s; calling -> %s", name, accessRolePutBodyJson, @@ -2257,7 +2294,7 @@ def update_access_role_attributes( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2293,7 +2330,7 @@ def add_system_attribute( request_url = "{}/system_attributes".format(self.config()["systemConfigUrl"]) if description: - logger.info( + logger.debug( "Add system attribute -> %s (%s) with value -> %s; calling -> %s", name, description, @@ -2301,7 +2338,7 @@ def add_system_attribute( request_url, ) else: - logger.info( + logger.debug( "Add system attribute -> %s with value -> %s; calling -> %s", name, value, @@ -2321,7 +2358,7 @@ def add_system_attribute( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2347,7 +2384,7 @@ def get_trusted_sites(self) -> dict | None: request_url = "{}/whitelist".format(self.config()["systemConfigUrl"]) - logger.info("Retrieving trusted sites; calling -> %s", request_url) + logger.debug("Retrieving trusted sites; calling -> %s", request_url) retries = 0 while True: @@ -2361,7 +2398,7 @@ def get_trusted_sites(self) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2396,7 +2433,7 @@ def add_trusted_site(self, trusted_site: str) -> dict | None: request_url = "{}/whitelist".format(self.config()["systemConfigUrl"]) - logger.info("Add trusted site -> %s; calling -> %s", trusted_site, request_url) + logger.debug("Add trusted site -> %s; calling -> %s", trusted_site, request_url) response = requests.put( url=request_url, @@ -2484,7 +2521,7 @@ def enable_audit(self): request_url = "{}/audit".format(self.config()["systemConfigUrl"]) - logger.info("Enable audit; calling -> %s", request_url) + logger.debug("Enable audit; calling -> %s", request_url) response = requests.put( url=request_url, @@ -2605,7 +2642,7 @@ def add_oauth_client( request_url = self.oauth_client_url() - logger.info( + logger.debug( "Adding oauth client -> %s (%s); calling -> %s", description, client_id, @@ -2625,7 +2662,7 @@ def add_oauth_client( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2651,7 +2688,7 @@ def get_oauth_client(self, client_id: str, show_error: bool = True) -> dict | No request_url = "{}/{}".format(self.oauth_client_url(), client_id) - logger.info("Get oauth client -> %s; calling -> %s", client_id, request_url) + logger.debug("Get oauth client -> %s; calling -> %s", client_id, request_url) retries = 0 while True: @@ -2665,7 +2702,7 @@ def get_oauth_client(self, client_id: str, show_error: bool = True) -> dict | No return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2696,7 +2733,7 @@ def update_oauth_client(self, client_id: str, updates: dict) -> dict | None: request_url = "{}/{}".format(self.oauth_client_url(), client_id) - logger.info( + logger.debug( "Update OAuth client -> %s with -> %s; calling -> %s", client_id, updates, @@ -2716,7 +2753,7 @@ def update_oauth_client(self, client_id: str, updates: dict) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2741,7 +2778,7 @@ def add_oauth_clients_to_access_role(self, access_role_name: str): request_url = self.config()["accessRoleUrl"] + "/" + access_role_name - logger.info( + logger.debug( "Get access role -> %s; calling -> %s", access_role_name, request_url ) @@ -2758,7 +2795,7 @@ def add_oauth_clients_to_access_role(self, access_role_name: str): break # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -2823,7 +2860,7 @@ def add_oauth_clients_to_access_role(self, access_role_name: str): ) if response.ok: - logger.info( + logger.debug( "OauthClients partition successfully added to access role -> %s", access_role_name, ) @@ -2925,7 +2962,7 @@ def get_auth_handler(self, name: str, show_error: bool = True) -> dict | None: request_url = "{}/{}".format(self.auth_handler_url(), name) - logger.info( + logger.debug( "Getting authentication handler -> %s; calling -> %s", name, request_url ) @@ -2941,7 +2978,7 @@ def get_auth_handler(self, name: str, show_error: bool = True) -> dict | None: return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3070,12 +3107,214 @@ def add_auth_handler_saml( "_description": "Specifies the SAML binding to use for the response to an AuthnRequest", "_value": "urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST", }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim1", + "_name": "Claim 1", + "_description": "SAML attribute/claim that should be mapped to an OTDS user attribute. This value is case sensitive. Note that mapped claims are only relevant if the corresponding account is auto-provisioned in OTDS. See the Administration Guide for details.", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute1", + "_name": "OTDS Attribute 1", + "_description": "OTDS user attribute to which the SAML attribute/claim should be mapped", + "_value": "mail", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim2", + "_name": "Claim 2", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/givenname", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute2", + "_name": "OTDS Attribute 2", + "_value": "givenName", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim3", + "_name": "Claim 3", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/surname", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute3", + "_name": "OTDS Attribute 3", + "_value": "sn", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim4", + "_name": "Claim 4", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/name", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute4", + "_name": "OTDS Attribute 4", + "_value": "displayName", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim5", + "_name": "Claim 5", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/streetaddress", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute5", + "_name": "OTDS Attribute 5", + "_value": "oTStreetAddress", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim6", + "_name": "Claim 6", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/locality", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute6", + "_name": "OTDS Attribute 6", + "_value": "l", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim7", + "_name": "Claim 7", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/stateorprovince", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute7", + "_name": "OTDS Attribute 7", + "_value": "st", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim8", + "_name": "Claim 8", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/postalcode", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute8", + "_name": "OTDS Attribute 8", + "_value": "postalCode", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim9", + "_name": "Claim 9", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/country", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute9", + "_name": "OTDS Attribute 9", + "_value": "countryName", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim10", + "_name": "Claim 10", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/otherphone", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute10", + "_name": "OTDS Attribute 10", + "_value": "oTTelephoneNumber", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim11", + "_name": "Claim 11", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/homephone", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute11", + "_name": "OTDS Attribute 11", + "_value": "homePhone", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim12", + "_name": "Claim 12", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/dateofbirth", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute12", + "_name": "OTDS Attribute 12", + "_value": "birthDate", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim13", + "_name": "Claim 13", + "_value": "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/gender", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute13", + "_name": "OTDS Attribute 13", + "_value": "gender", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim14", + "_name": "Claim 14", + "_value": "", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute14", + "_name": "OTDS Attribute 14", + "_value": "", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim15", + "_name": "Claim 15", + "_value": "http://schemas.xmlsoap.org/claims/Group", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute15", + "_name": "OTDS Attribute 15", + "_value": "oTMemberOf", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim16", + "_name": "Claim 16", + "_value": "http://schemas.xmlsoap.org/claims/Department", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute16", + "_name": "OTDS Attribute 16", + "_value": "oTDepartment", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim17", + "_name": "Claim 17", + "_value": "http://schemas.xmlsoap.org/claims/Title", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute17", + "_name": "OTDS Attribute 17", + "_value": "title", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim18", + "_name": "Claim 18", + "_value": "", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute18", + "_name": "OTDS Attribute 18", + "_value": "", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim19", + "_name": "Claim 19", + "_value": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute19", + "_name": "OTDS Attribute 19", + "_value": "oTMemberOf", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claim20", + "_name": "Claim 20", + "_value": "", + }, + { + "_key": "com.opentext.otds.as.drivers.saml.claimAttribute20", + "_name": "OTDS Attribute 20", + "_value": "", + }, ], } request_url = self.auth_handler_url() - logger.info( + logger.debug( "Adding SAML auth handler -> %s (%s); calling -> %s", name, description, @@ -3095,7 +3334,7 @@ def add_auth_handler_saml( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3196,7 +3435,7 @@ def add_auth_handler_sap( # 2. Create the auth handler in OTDS request_url = self.auth_handler_url() - logger.info( + logger.debug( "Adding SAP auth handler -> %s (%s); calling -> %s", name, description, @@ -3222,7 +3461,7 @@ def add_auth_handler_sap( # 3. Upload the certificate file: # Check that the certificate (PSE) file is readable: - logger.info("Reading certificate file -> %s...", certificate_file) + logger.debug("Reading certificate file -> %s...", certificate_file) try: # PSE files are binary - so we need to open with "rb": with open(certificate_file, "rb") as certFile: @@ -3246,21 +3485,21 @@ def add_auth_handler_sap( certContentDecoded = base64.b64decode(certContent, validate=True) certContentEncoded = base64.b64encode(certContentDecoded).decode("utf-8") if certContentEncoded == certContent.decode("utf-8"): - logger.info( + logger.debug( "Certificate file -> %s is base64 encoded", certificate_file ) cert_file_encoded = True else: cert_file_encoded = False except TypeError: - logger.info( + logger.debug( "Certificate file -> %s is not base64 encoded", certificate_file ) cert_file_encoded = False if cert_file_encoded: certificate_file = "/tmp/" + os.path.basename(certificate_file) - logger.info("Writing decoded certificate file -> %s...", certificate_file) + logger.debug("Writing decoded certificate file -> %s...", certificate_file) try: # PSE files need to be binary - so we need to open with "wb": with open(certificate_file, "wb") as certFile: @@ -3289,7 +3528,7 @@ def add_auth_handler_sap( request_url = self.auth_handler_url() + "/" + name + "/files" - logger.info( + logger.debug( "Uploading certificate file -> %s for SAP auth handler -> %s (%s); calling -> %s", certificate_file, name, @@ -3705,7 +3944,7 @@ def add_auth_handler_oauth( request_url = self.auth_handler_url() - logger.info( + logger.debug( "Adding OAuth auth handler -> %s (%s); calling -> %s", name, description, @@ -3725,7 +3964,7 @@ def add_auth_handler_oauth( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3767,7 +4006,7 @@ def consolidate(self, resource_name: str) -> bool: request_url = "{}".format(self.consolidation_url()) - logger.info( + logger.debug( "Consolidation of resource -> %s; calling -> %s", resource_dn, request_url ) @@ -3784,7 +4023,7 @@ def consolidate(self, resource_name: str) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3825,7 +4064,7 @@ def impersonate_resource( request_url = "{}/{}/impersonation".format(self.resource_url(), resource_name) - logger.info( + logger.debug( "Impersonation settings for resource -> %s; calling -> %s", resource_name, request_url, @@ -3844,7 +4083,7 @@ def impersonate_resource( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3884,7 +4123,7 @@ def impersonate_oauth_client( request_url = "{}/{}/impersonation".format(self.oauth_client_url(), client_id) - logger.info( + logger.debug( "Impersonation settings for OAuth Client -> %s; calling -> %s", client_id, request_url, @@ -3903,7 +4142,7 @@ def impersonate_oauth_client( return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -3948,7 +4187,7 @@ def get_password_policy(self): request_url = "{}/passwordpolicy".format(self.config()["systemConfigUrl"]) - logger.info("Getting password policy; calling -> %s", request_url) + logger.debug("Getting password policy; calling -> %s", request_url) retries = 0 while True: @@ -3962,7 +4201,7 @@ def get_password_policy(self): return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: @@ -4009,7 +4248,7 @@ def update_password_policy(self, update_values: dict) -> bool: request_url = "{}/passwordpolicy".format(self.config()["systemConfigUrl"]) - logger.info( + logger.debug( "Update password policy with these new values -> %s; calling -> %s", update_values, request_url, @@ -4028,7 +4267,7 @@ def update_password_policy(self, update_values: dict) -> bool: return True # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(revalidate=True) retries += 1 else: diff --git a/pyxecm/otmm.py b/pyxecm/otmm.py new file mode 100644 index 0000000..b6fdede --- /dev/null +++ b/pyxecm/otmm.py @@ -0,0 +1,808 @@ +""" +OTMM Module to interact with the OpenText Media Management API +See: + +Class: OTMM +Methods: + +__init__ : class initializer +config : Returns config data set +get_data: Get the Data object that holds all processed Media Management base Assets +credentials: Returns the token data +request_header: Returns the request header for ServiceNow API calls +parse_request_response: Parse the REST API responses and convert + them to Python dict in a safe way +exist_result_item: Check if an dict item is in the response + of the ServiceNow API call +get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response + +authenticate : Authenticates at ServiceNow API +""" + +__author__ = "Dr. Marc Diefenbruch" +__copyright__ = "Copyright 2024, OpenText" +__credits__ = ["Kai-Philip Gatzweiler"] +__maintainer__ = "Dr. Marc Diefenbruch" +__email__ = "mdiefenb@opentext.com" + +from json import JSONDecodeError +import os +import logging +import urllib.parse +import threading +import traceback + +import requests +from requests.exceptions import HTTPError, RequestException + +from pyxecm.helper.data import Data + +logger = logging.getLogger("pyxecm.otmm") + +REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"} + +REQUEST_TIMEOUT = 60 + +ASSET_BASE_PATH = "/tmp/mediaassets" + + +class OTMM: + """Used to retrieve and automate data extraction from OTMM.""" + + _config: dict + _access_token = None + _data: Data = None + _thread_number = 3 + _download_dir = "" + _business_unit_exclusions = None + _product_exclusions = None + + def __init__( + self, + base_url: str, + username: str, + password: str, + client_id: str, + client_secret: str, + thread_number: int, + download_dir: str, + business_unit_exclusions: list | None = None, + product_exclusions: list | None = None, + ): + + # Initialize otcs_config as an empty dictionary + otmm_config = {} + + # Store the credentials and parameters in a config dictionary: + otmm_config["baseUrl"] = base_url + otmm_config["username"] = username + otmm_config["password"] = password + otmm_config["clientId"] = client_id + otmm_config["clientSecret"] = client_secret + + otmm_config["restUrl"] = otmm_config["baseUrl"] + "/otmmapi/v6" + otmm_config["tokenUrl"] = otmm_config["restUrl"] + "/sessions/oauth2/token" + otmm_config["domainUrl"] = otmm_config["restUrl"] + "/lookupdomains" + otmm_config["assetsUrl"] = otmm_config["restUrl"] + "/assets" + otmm_config["searchUrl"] = otmm_config["restUrl"] + "/search/text" + + self._config = otmm_config + + self._session = requests.Session() + + self._data = Data() + + self._thread_number = thread_number + + self._download_dir = download_dir + + self._business_unit_exclusions = business_unit_exclusions + self._product_exclusions = product_exclusions + + # end method definition + + def thread_wrapper(self, target, *args, **kwargs): + """Function to wrap around threads to catch exceptions during exection""" + try: + target(*args, **kwargs) + except Exception as e: + thread_name = threading.current_thread().name + logger.error("Thread %s: failed with exception %s", thread_name, e) + logger.error(traceback.format_exc()) + + # end method definition + + def config(self) -> dict: + """Returns the configuration dictionary + + Returns: + dict: Configuration dictionary + """ + return self._config + + # end method definition + + def get_data(self) -> Data: + """Get the Data object that holds all processed Media Management base Assets + + Returns: + Data: Datastructure with all processed assets. + """ + + return self._data + + # end method definition + + def authenticate(self) -> str | None: + """Authenticate at OTMM with client ID and client secret or with basic authentication.""" + + request_url = self.config()["tokenUrl"] + headers = {"Content-Type": "application/x-www-form-urlencoded"} + payload = { + "username": self.config()["username"], + "password": self.config()["password"], + "grant_type": "password", + "client_id": self.config()["clientId"], + "client_secret": self.config()["clientSecret"], + } + + try: + response = self._session.post( + request_url, + headers=headers, + data=urllib.parse.urlencode(payload), + ) + response.raise_for_status() + + self._access_token = ( + response.json().get("token_info").get("oauth_token").get("accessToken") + ) + self._session.headers.update( + {"Authorization": f"Bearer {self._access_token}"} + ) + + return self._access_token + + except requests.exceptions.HTTPError as http_err: + logger.error("HTTP error occurred: %s", http_err) + except requests.exceptions.ConnectionError as conn_err: + logger.error("Connection error occurred: %s", conn_err) + except requests.exceptions.Timeout as timeout_err: + logger.error("Timeout error occurred: %s", timeout_err) + except requests.exceptions.RequestException as req_err: + logger.error("Request error occurred: %s", req_err) + except Exception as e: + logger.error("An unexpected error occurred: %s", e) + + return None + + # end method definition + + def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict: + """Get a dictionary with product names (keys) and IDs (values) + + Args: + domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT". + Returns: + dict: Dictionary of all known products. + """ + + lookup_products = self.lookup_domains(domain) + + result = {} + for product in lookup_products: + result[product.get("display_value")] = product.get("field_value").get( + "value" + ) + + return result + + # end method definition + + def get_business_units( + self, domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU" + ) -> dict: + """Get a dictionary with product names (keys) and IDs (values) + + Args: + domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU". + + Returns: + dict: Dictionary of all known business units. + """ + + lookup_bus = self.lookup_domains(domain) + result = {} + for bu in lookup_bus: + result[bu.get("display_value")] = bu.get("field_value").get("value") + + return result + + # end method definition + + def lookup_domains(self, domain: str): + """Lookup domain values in a given OTMM domain + + Args: + domain (str): name / identifier of the domain. + + Returns: + _type_: _description_ + """ + + request_url = self.config()["domainUrl"] + "/" + domain + + try: + response = self._session.get( + request_url, + ) + + response.raise_for_status() + + except requests.exceptions.HTTPError as http_err: + logger.error("HTTP error occurred: %s", http_err) + except requests.exceptions.ConnectionError as conn_err: + logger.error("Connection error occurred: %s", conn_err) + except requests.exceptions.Timeout as timeout_err: + logger.error("Timeout error occurred: %s", timeout_err) + except requests.exceptions.RequestException as req_err: + logger.error("Request error occurred: %s", req_err) + except Exception as e: + logger.error("An unexpected error occurred: %s", e) + + response = ( + response.json() + .get("lookup_domain_resource") + .get("lookup_domain") + .get("domainValues") + ) + + return response + + # end method definition + + def get_business_unit_assets( + self, bu_id: int, offset: int = 0, limit: int = 200 + ) -> list | None: + """Get all Media Assets for a given Business Unit (ID) that are NOT related to a product. + + Args: + bu_id (int): Identifier of the Business Unit. + offset (int, optional): Result pagination. Starting ID. Defaults to 0. + limit (int, optional): Result pagination. Page length. Defaults to 200. + + Returns: + dict: Search Results + """ + + payload = { + "load_type": ["metadata"], + "load_multilingual_values": ["true"], + "level_of_detail": ["full"], + "after": offset, + "limit": limit, + "multilingual_language_code": ["en_US"], + "search_config_id": ["3"], + "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"], + "metadata_to_return": ["ARTESIA.FIELD.TAG"], + "facet_restriction_list": '{"facet_restriction_list":{"facet_field_restriction":[{"type":"com.artesia.search.facet.FacetSimpleFieldRestriction","facet_generation_behavior":"EXCLUDE","field_id":"PRODUCT_CHAR_ID","value_list":[null]}]}}', + "search_condition_list": [ + '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTMM.FIELD.BUSINESS_UNIT.TAB","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"OTMM.COLUMN.BUSINESS_UNIT.TAB","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"' + + str(bu_id) + + '","left_paren":"(","right_paren":")"}]}]}}' + ], + } + + flattened_data = { + k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items() + } + + search_result = self.search_assets(flattened_data) + + if not search_result or not "search_result_resource" in search_result: + logger.error("No assets found via search!") + return None + search_result = search_result.get("search_result_resource") + + hits = search_result["search_result"]["hit_count"] + hits_total = search_result["search_result"]["total_hit_count"] + + asset_list = search_result.get("asset_list", None) + + hits_remaining = hits_total - hits + + while hits_remaining > 0: + flattened_data["after"] += hits + search_result = self.search_assets(flattened_data) + + if not search_result or not "search_result_resource" in search_result: + break + + search_result = search_result.get("search_result_resource") + + hits = search_result["search_result"]["hit_count"] + hits_remaining = hits_remaining - hits + + asset_list += search_result.get("asset_list", []) + + return asset_list + + # end method definition + + def get_product_assets( + self, product_id: int, offset: int = 0, limit: int = 200 + ) -> list | None: + """Get all Media Assets for a given product (ID). + + Args: + product_id (int): Identifier of the product. + offset (int, optional): Result pagination. Starting ID. Defaults to 0. + limit (int, optional): Result pagination. Page length. Defaults to 200. + + Returns: + dict: Search Results + """ + + payload = { + "load_type": ["metadata"], + "load_multilingual_values": ["true"], + "level_of_detail": ["full"], + "after": offset, + "limit": limit, + "multilingual_language_code": ["en_US"], + "search_config_id": ["3"], + "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"], + "metadata_to_return": ["ARTESIA.FIELD.TAG"], + "search_condition_list": [ + '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTM.TABLE.PRODUCT_TABLE_FIELD","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"PRODUCT_CHAR_ID","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"' + + str(product_id) + + '","left_paren":"(","right_paren":")"}]}]}}' + ], + } + + flattened_data = { + k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items() + } + + search_result = self.search_assets(flattened_data) + + if not search_result or not "search_result_resource" in search_result: + logger.error("No assets found via search!") + return None + search_result = search_result.get("search_result_resource") + + hits = search_result["search_result"]["hit_count"] + hits_total = search_result["search_result"]["total_hit_count"] + + asset_list = search_result.get("asset_list", None) + + hits_remaining = hits_total - hits + + while hits_remaining > 0: + flattened_data["after"] += hits + search_result = self.search_assets(flattened_data) + + if not search_result or not "search_result_resource" in search_result: + break + + search_result = search_result.get("search_result_resource") + + hits = search_result["search_result"]["hit_count"] + hits_remaining = hits_remaining - hits + + asset_list += search_result.get("asset_list", []) + + return asset_list + + # end method definition + + def download_asset( + self, + asset_id: str, + asset_name: str, + download_url: str = "", + skip_existing: bool = True, + ) -> bool: + """Download a given Media Asset + + Args: + asset_id (str): ID of the asset to download + asset_name (str): Name of the assets - becomes the file name. + download_url (str, optiona): URL to download the asset (optional). + + Returns: + bool: True = success, False = failure + """ + # url = f"{self.base_url}/assets/v1/{asset_id}/download" + + if download_url: + request_url = download_url + else: + request_url = self.config()["assetsUrl"] + "/" + asset_id + "/contens" + + file_name = os.path.join(self._download_dir, asset_id) + + if os.path.exists(file_name): + if skip_existing: + logger.debug( + "OpenText Media Management asset has been downloaded before skipping download -> '%s' (%s) to -> %s...", + asset_name, + asset_id, + file_name, + ) + return True + else: + logger.debug( + "OpenText Media Management asset has been downloaded before. Update download -> '%s' (%s) to -> %s...", + asset_name, + asset_id, + file_name, + ) + os.remove(file_name) + + try: + if not os.path.exists(self._download_dir): + # Create the directory + os.makedirs(self._download_dir) + + logger.info( + "Downloading OpenText Media Management asset -> '%s' (%s) to -> %s...", + asset_name, + asset_id, + file_name, + ) + response = self._session.get(request_url, stream=True) + response.raise_for_status() + with open(file_name, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + return True + except HTTPError as http_err: + logger.error("HTTP error occurred -> %s!", str(http_err)) + except RequestException as req_err: + logger.error("Request error occurred -> %s!", str(req_err)) + except Exception as err: + logger.error("An error occurred -> %s!", str(err)) + + return False + + # end method definition + + def search_assets(self, payload: dict): + """Search an asset based on the given parameters / criterias. + + Args: + payload (dict): in the format of: + payload = { + "PRODUCT_CHAR_ID": "Extended ECM for Engineering", + "BUSINESS_AREA_CHAR_ID": "Content", + "keyword_query": "*", + "limit": "5", + } + + Returns: + _type_: JSON search results + """ + + request_url = self.config()["searchUrl"] + + headers = {"Content-Type": "application/x-www-form-urlencoded"} + + encoded_payload = urllib.parse.urlencode(payload, safe="/:") + + try: + response = self._session.post( + request_url, + headers=headers, + data=encoded_payload, + ) + + response.raise_for_status() + + except requests.exceptions.HTTPError as http_err: + logger.error("HTTP error occurred: %s", http_err) + except requests.exceptions.ConnectionError as conn_err: + logger.error("Connection error occurred: %s", conn_err) + except requests.exceptions.Timeout as timeout_err: + logger.error("Timeout error occurred: %s", timeout_err) + except requests.exceptions.RequestException as req_err: + logger.error("Request error occurred: %s", req_err) + except Exception as e: + logger.error("An unexpected error occurred: %s", e) + + return response.json() + + # end method definition + + def get_asset_metadata(self, asset_id: str) -> dict: + """Retrieve metadata of an asset based on the given parameters / criterias. + + Args: + asset_id (str): asset_id of the asset to query + + Returns: + dict: Metadata information as dict with values as list + + example: + { + 'OTMM.CUSTOM.FIELD_TITLE': [], + 'OTMM.CUSTOM.FIELD_DESCRIPTION': [], + 'OTMM.CUSTOM.FIELD_KEYWORDS': [], + 'CONTENT_TYPE_COMBO_CHAR_ID': [], + 'OTM.TABLE.APPROVED_USAGE_FIELD': [], + 'OTMM.FIELD.RESOURCE_LIBRARY.TAB': [], + 'LANGUAGE_COMBO_CHAR_ID': [], + 'OTMM.CUSTOM.FIELD_PART_NUMBER': [], + 'OTMM.FIELD.BUSINESS_UNIT.TAB': ['Content'], + 'OTM.TABLE.PRODUCT_TABLE_FIELD': ['Vendor Invoice Management for SAP'], + 'OTM.TABLE.INDUSTRY_TABLE_FIELD': [], + 'OTMM.CUSTOM.FIELD_URL': [], + 'OTMM.CUSTOM.FIELD_PREVIOUS_URL': [], + 'OTMM.CUSTOM.FIELD_CONTENT_OWNER': [], + 'OTMM.CUSTOM.FIELD_EMAIL': [], + 'OTMM.CUSTOM.FIELD_JOB_NUMBER': [], + 'OTM.TABLE.BUSINESS_AREA_TABLE_FIELD': [], + 'OTM.TABLE.JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'], + 'OTMM.FIELD.PERSONA.TAB': [], + 'OTMM.FIELD.SERVICES.TAB': [], + 'OTMM.FIELD.REGION.TAB': [], + 'OTMM.FIELD.PURPOSE.TAB': [], + 'AODA_CHAR_ID': [], + 'REVIEW_CADENCE_CHAR_ID': [], + 'CONTENT_CREATED_DATE_ID': [], + 'ARTESIA.FIELD.EXPIRATION DATE': [], + 'OTMM.CUSTOM.FIELD_REAL_COMMENTS': [] + } + """ + + request_url = self.config()["assetsUrl"] + f"/{asset_id}" + headers = {"Content-Type": "application/x-www-form-urlencoded"} + + params = { + "load_type": "custom", + "level_of_detail": "slim", + "data_load_request": '{"data_load_request":{"load_multilingual_field_values":"true","load_subscribed_to":"true","load_asset_content_info":"true","load_metadata":"true","load_inherited_metadata":"true","load_thumbnail_info":"true","load_preview_info":"true", "load_pdf_preview_info":"true", "load_3d_preview_info" : "true","load_destination_links":"true", "load_security_policies":"true","load_path":"true","load_deep_zoom_info":"true"}}', + } + + try: + response = self._session.get(request_url, headers=headers, params=params) + + response.raise_for_status() + + except requests.exceptions.HTTPError as http_err: + logger.error("HTTP error occurred: %s", http_err) + except requests.exceptions.ConnectionError as conn_err: + logger.error("Connection error occurred: %s", conn_err) + except requests.exceptions.Timeout as timeout_err: + logger.error("Timeout error occurred: %s", timeout_err) + except requests.exceptions.RequestException as req_err: + logger.error("Request error occurred: %s", req_err) + except Exception as e: + logger.error("An unexpected error occurred: %s", e) + + # Read Metadata from nested structure + try: + metadata = ( + response.json() + .get("asset_resource", {}) + .get("asset", {}) + .get("metadata", {}) + .get("metadata_element_list", [])[0] + .get("metadata_element_list", []) + ) + except JSONDecodeError: + logger.error("Cannot decode JSON response for assset_id -> %s", asset_id) + return {} + + # Generate empty result dict + result = {} + + # Extract Metadata fields with values as list + for data in metadata: + index = data.get("id").replace(" ", "").replace(".", "_") + + try: + result[index] = data.get("value").get("value").get("value") + except AttributeError: + + infos = [] + for element in data.get("metadata_element_list", []): + for value in element.get("values", []): + infos.append(value.get("value").get("display_value")) + + result[index] = infos + return result + + # end method definition + + def load_assets( + self, + load_products: bool = True, + load_business_units: bool = True, + download_assets: bool = True, + ) -> bool: + """Load all Media Assets for Products and Business Units + + Returns: + bool: True = Success, False = Failure + """ + + asset_list = [] + + if load_products: + + products = self.get_products() # dictionary with key = name and value = ID + + if self._product_exclusions: + logger.info("Excluding products -> %s", str(self._product_exclusions)) + for key in self._product_exclusions: + products.pop( + key, None + ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't + + for product_name, product_id in products.items(): + if "DO NOT USE" in product_name: + continue + + logger.info("Processing product -> '%s'...", product_name) + + assets = self.get_product_assets(product_id) + + if not assets: + logger.info("Found no assets for product -> '%s'", product_name) + continue + + for asset in assets: + asset["workspace_type"] = "Product" + asset["workspace_name"] = product_name + + asset_list += assets + + if load_business_units: + + business_units = self.get_business_units() + + if self._business_unit_exclusions: + logger.info( + "Excluding business units -> %s", + str(self._business_unit_exclusions), + ) + for key in self._business_unit_exclusions: + business_units.pop( + key, None + ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't + + for bu_name, bu_id in business_units.items(): + logger.debug(bu_name) + assets = self.get_business_unit_assets(bu_id) + + if not assets: + logger.info("Found no assets for business unit -> '%s'", bu_name) + continue + + for asset in assets: + asset["workspace_type"] = "Business Unit" + asset["workspace_name"] = bu_name + + asset_list += assets + + total_count = len(asset_list) + + asset_list = [ + item + for item in asset_list + if not item.get("deleted", False) and not item.get("expired", False) + ] + + number = self._thread_number + + if total_count >= number: + partition_size = total_count // number + remainder = total_count % number + else: + partition_size = total_count + number = 1 + remainder = 0 + + logger.info( + "Processing -> %s Media Assets, thread number -> %s, partition size -> %s", + str(total_count), + number, + partition_size, + ) + + threads = [] + + start = 0 + for index in range(number): + extra = 1 if remainder > 0 else 0 + end = start + partition_size + extra + if remainder > 0: + remainder -= 1 + + thread = threading.Thread( + name=f"load_assets_{index + 1:02}", + target=self.thread_wrapper, + args=( + self.load_assets_worker, + asset_list, + partition_size + extra, + start, + download_assets, + ), + ) + thread.start() + threads.append(thread) + start = end + + for thread in threads: + thread.join() + + return True + + # end method definition + + def load_assets_worker( + self, + asset_list: list, + partition_size: int, + offset: int = 0, + download_assets: bool = True, + ): + """Worker Method for multi-threading + + Args: + asset_list (list): List of assets to process + business_unit (str, optional): Name of business unit. Defaults to "". + """ + + logger.info( + "Processing Media Assets in range from -> %s to -> %s...", + offset, + offset + partition_size, + ) + + worker_asset_list = asset_list[offset : offset + partition_size] + + for asset in worker_asset_list: + asset_id = asset.get("asset_id") + asset_name = asset.get("name") + # Store name as asset_name + asset["asset_name"] = asset_name + asset_download_url = asset.get("delivery_service_url") + asset_deleted = asset.get("deleted", False) + asset_expired = asset.get("expired", False) + if asset_deleted or asset_expired: + logger.info( + "Asset -> '%s' is deleted or expired. Skipping...", + asset_name, + ) + continue + + if download_assets: + success = self.download_asset( + asset_id=asset_id, + asset_name=asset_name, + download_url=asset_download_url, + ) + if not success: + logger.error( + "Failed to download asset -> '%s' (%s) to '%s'", + asset_name, + asset_id, + self._download_dir, + ) + else: + logger.info( + "Successfully downloaded asset -> '%s' (%s) to '%s'", + asset_name, + asset_id, + self._download_dir, + ) + + ## Add metadata to asset and add to new list + asset.update(self.get_asset_metadata(asset_id)) + + # Now we add the article to the Pandas Data Frame in the Data class: + with self._data.lock(): + self._data.append(worker_asset_list) diff --git a/pyxecm/otpd.py b/pyxecm/otpd.py index 9acf400..7b9ceb8 100644 --- a/pyxecm/otpd.py +++ b/pyxecm/otpd.py @@ -248,18 +248,18 @@ def authenticate(self, revalidate: bool = False) -> dict: # Step2: fetch session id from the response, and hit j_security_check with proper authentication # Step3: get session id from the response, add to self. It can be used for other transactions session = requests.Session() - logger.info("Initiating dummy rest call to Tomcat to get initial session id") + logger.debug("Initiating dummy rest call to Tomcat to get initial session id") response = session.put(request_url, json=payload) logger.info(response.text) if response.ok: - logger.info("Url to authenticate Tomcat for Session id -> %s", auth_url) + logger.debug("Url to authenticate Tomcat for Session id -> %s", auth_url) session_response = session.post(auth_url) if session_response.ok: - logger.info( + logger.debug( "Response for -> %s is -> %s", auth_url, str(session_response) ) session_dict = session.cookies.get_dict() - logger.info( + logger.debug( "Session id to perform Rest API calls to Tomcat -> %s", session_dict["JSESSIONID"], ) @@ -296,7 +296,7 @@ def import_database(self, filename: str): request_url = self.config()["otpdImportDatabaseUrl"] logger.info( - "Importing Database backup -> %s, into PowerDocs ServerManager on -> %s", + "Importing PowerDocs database backup -> %s, into PowerDocs ServerManager on -> %s", filename, request_url, ) @@ -308,7 +308,7 @@ def import_database(self, filename: str): return response else: logger.error( - "Failed to Import Database backup -> %s into -> %s; error -> %s", + "Failed to import PowerDocs database backup -> %s into -> %s; error -> %s", filename, request_url, response.text, @@ -340,10 +340,11 @@ def apply_setting( request_url = self.config()["settingsUrl"] - logger.info( - "Update setting -> %s with value -> %s; calling -> %s", + logger.debug( + "Update PowerDocs setting -> %s with value -> %s (tenant -> %s); calling -> %s", setting_name, setting_value, + tenant_name, request_url, ) @@ -363,13 +364,15 @@ def apply_setting( return self.parse_request_response(response) # Check if Session has expired - then re-authenticate and try once more elif response.status_code == 401 and retries == 0: - logger.warning("Session has expired - try to re-authenticate...") + logger.debug("Session has expired - try to re-authenticate...") self.authenticate(True) retries += 1 else: logger.error( - "Failed to update setting -> %s; error -> %s", + "Failed to update PowerDocs setting -> %s with value -> %s (tenant -> %s); error -> %s", setting_name, + setting_value, + tenant_name, response.text, ) return None diff --git a/requirements.txt b/requirements.txt index 0eedac8..4bda637 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ pymdown-extensions requests requests_toolbelt +xmltodict setuptools kubernetes zipfile36