diff --git a/nova/conf/vmware.py b/nova/conf/vmware.py
index 5aeb0e3a6f9..0bd0bfef9c7 100644
--- a/nova/conf/vmware.py
+++ b/nova/conf/vmware.py
@@ -515,6 +515,34 @@
 The sync-loop thread for custom traits runs continuously, sleeping after
 syncing the traits. This setting defines how long to sleep between runs.
 
+Possible values:
+ * integer >= time in seconds to sleep between runs
+ * intger < 0: disable the sync-loop
+"""),
+    cfg.IntOpt('external_customer_drs_rules_sync_loop_spacing',
+               default=1800,
+               help="""
+Amount of time in seconds to wait between external customer DRS rules
+sync-loop runs
+
+The sync-loop thread for external customer DRS rules runs continuously,
+sleeping after ensuring all necessary DRS rules exist and cleaning up
+no-longer-necessary ones. This setting defines how long to sleep between runs.
+
+Possible values:
+ * integer >= time in seconds to sleep between runs
+ * intger < 0: disable the sync-loop
+"""),
+    cfg.IntOpt('external_customer_vm_groups_sync_loop_spacing',
+               default=1800,
+               help="""
+Amount of time in seconds to wait between external customer VmGroup
+sync-loop runs
+
+The sync-loop thread for external customer VmGroups runs continuously,
+sleeping after updating the members of the VmGroups found in the cluster and
+matching a certain prefix. This setting defines how long to sleep between runs.
+
 Possible values:
  * integer >= time in seconds to sleep between runs
  * intger < 0: disable the sync-loop
diff --git a/nova/utils.py b/nova/utils.py
index efeef40a763..a1352606eb7 100644
--- a/nova/utils.py
+++ b/nova/utils.py
@@ -94,6 +94,7 @@
 BIGVM_EXCLUSIVE_TRAIT = 'CUSTOM_HANA_EXCLUSIVE_HOST'
 
 EXTERNAL_CUSTOMER_SUPPORTED_TRAIT = 'CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED'
+EXTERNAL_CUSTOMER_DOMAIN_TRAIT = 'CUSTOM_EXTERNAL_CUSTOMER_{}'
 
 _FILE_CACHE = {}
 
diff --git a/nova/virt/vmwareapi/cluster_util.py b/nova/virt/vmwareapi/cluster_util.py
index c4d6749e00a..dad48c248af 100644
--- a/nova/virt/vmwareapi/cluster_util.py
+++ b/nova/virt/vmwareapi/cluster_util.py
@@ -20,6 +20,7 @@
 from nova import exception
 from nova.i18n import _
 from nova import utils
+from nova.virt.vmwareapi import error_util
 
 LOG = logging.getLogger(__name__)
 
@@ -220,6 +221,35 @@ def update_vm_group_membership(session, cluster, vm_group_name, vm_ref,
     reconfigure_cluster(session, cluster, config_spec)
 
 
+def set_vm_group_members(session, cluster_ref, vm_group_name, vm_refs):
+    """Replace current members of the VmGroup with the given list
+
+    The VmGroup has to exist.
+    """
+
+    @utils.synchronized(f"set-vm-group-members-{vm_group_name}")
+    def _set_vm_group_members(session, cluster_ref, vm_group_name, vm_refs):
+        """local function to add vmgroup name to lock"""
+        cluster_config = session._call_method(
+            vim_util, "get_object_property", cluster_ref, "configurationEx")
+
+        client_factory = session.vim.client.factory
+        config_spec = client_factory.create('ns0:ClusterConfigSpecEx')
+
+        group = _get_vm_group(cluster_config, vm_group_name)
+        if not group:
+            raise error_util.VmGroupDoesNotExist(vm_group_name=vm_group_name)
+
+        group.vm = vm_refs
+
+        group_spec = create_group_spec(client_factory, group, 'edit')
+        config_spec.groupSpec = [group_spec]
+
+        reconfigure_cluster(session, cluster_ref, config_spec)
+
+    return _set_vm_group_members(session, cluster_ref, vm_group_name, vm_refs)
+
+
 def create_vm_rule(client_factory, name, vm_refs, policy='affinity',
                    rule=None):
     """Create a ClusterAffinityRuleSpec or ClusterAntiAffinityRuleSpec object
@@ -249,6 +279,29 @@ def create_vm_rule(client_factory, name, vm_refs, policy='affinity',
     return rule
 
 
+def create_vm_host_rule(client_factory, name, host_group_name, vm_group_name,
+                        policy='affinity', mandatory=True):
+    """Create a ClusterVmHostRuleInfo object
+
+    This rule defines anti-/affinity between a VmGroup and a HostGroup.
+    """
+    rule = client_factory.create('ns0:ClusterVmHostRuleInfo')
+    rule.name = name
+    rule.enabled = True
+    rule.mandatory = True
+    rule.vmGroupName = vm_group_name
+
+    if policy == 'affinity':
+        rule.affineHostGroupName = host_group_name
+    elif policy == 'anti-affinity':
+        rule.antiAffineHostGroupName = host_group_name
+    else:
+        msg = _('%s policy is not supported.') % policy
+        raise exception.ValidationError(msg)
+
+    return rule
+
+
 def create_rule_spec(client_factory, rule, operation='add'):
     """Create a ClusterRuleSpec object"""
     rule_spec = client_factory.create('ns0:ClusterRuleSpec')
@@ -271,18 +324,8 @@ def _create_cluster_rules_spec(client_factory, name, vm_refs,
 def _create_cluster_group_rules_spec(client_factory, name, vm_group_name,
                                      host_group_name, policy='affinity',
                                      rule=None):
-    rules_info = client_factory.create('ns0:ClusterVmHostRuleInfo')
-    rules_info.name = name
-    rules_info.enabled = True
-    rules_info.mandatory = True
-    rules_info.vmGroupName = vm_group_name
-    if policy == 'affinity':
-        rules_info.affineHostGroupName = host_group_name
-    elif policy == 'anti-affinity':
-        rules_info.antiAffineHostGroupName = host_group_name
-    else:
-        msg = _('%s policy is not supported.') % policy
-        raise exception.ValidationError(msg)
+    rules_info = create_vm_host_rule(
+        client_factory, name, host_group_name, vm_group_name, policy)
 
     if rule is not None:
         rules_info.key = rule.key
diff --git a/nova/virt/vmwareapi/constants.py b/nova/virt/vmwareapi/constants.py
index e1c92aa8434..4f693ee1b4f 100644
--- a/nova/virt/vmwareapi/constants.py
+++ b/nova/virt/vmwareapi/constants.py
@@ -249,6 +249,8 @@
 # distinguish between what has been automatically created and what is
 # admin-created.
 DRS_PREFIX = 'NOVA_'
+# DRS rule prefix for external customer HostGroup affinity rules
+DRS_EXT_CUSTOMER_PREFIX = f"{DRS_PREFIX}external_customer_"
 
 
 # Prefix for custom attributes (CustomFieldDef names) that's used by Nova to
diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py
index bb92c587f99..ff7533c5d8d 100644
--- a/nova/virt/vmwareapi/driver.py
+++ b/nova/virt/vmwareapi/driver.py
@@ -260,6 +260,8 @@ def init_host(self, host):
         LOG.debug("Starting green server-group sync-loop thread")
         utils.spawn(self._server_group_sync_loop, host)
         utils.spawn(self._custom_traits_sync_loop, host)
+        utils.spawn(self._external_customer_drs_rules_sync_loop)
+        utils.spawn(self._external_customer_vm_groups_sync_loop)
 
     def get_nodenames_by_uuid(self, refresh=False):
         """Overwritten method to return a locally-cached node UUID
@@ -611,6 +613,40 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None):
         # where cpu traits are added. In the vmware world, this is where we
         # would add nested providers representing tenant VDC and similar.
 
+        self._update_provider_tree_for_external_customers(
+            provider_tree, nodename, stats['hostgroups'])
+
+    def _update_provider_tree_for_external_customers(self, provider_tree,
+                                                     nodename, hostgroups):
+        existing_traits = {
+            t for t in provider_tree.data(nodename).traits
+            if t.startswith(tuple(
+                utils.EXTERNAL_CUSTOMER_DOMAIN_TRAIT.format(
+                    prefix.upper().removesuffix('-').replace('-', '_'))
+                for prefix in CONF.external_customer_domain_name_prefixes)) or
+               t == utils.EXTERNAL_CUSTOMER_SUPPORTED_TRAIT}
+
+        wanted_traits = set()
+        # we can have generic host groups and special ones per domain
+        for prefix in CONF.external_customer_domain_name_prefixes:
+            hg_prefix = prefix.removesuffix('-')
+
+            for hg_name in hostgroups:
+                if not hg_name.startswith(hg_prefix):
+                    continue
+
+                trait = utils.EXTERNAL_CUSTOMER_DOMAIN_TRAIT.format(
+                    hg_name.upper().replace('-', '_'))
+                wanted_traits.add(trait)
+
+        if wanted_traits:
+            wanted_traits.add(utils.EXTERNAL_CUSTOMER_SUPPORTED_TRAIT)
+
+        traits_to_add = wanted_traits - existing_traits
+        provider_tree.add_traits(nodename, *traits_to_add)
+        traits_to_remove = existing_traits - wanted_traits
+        provider_tree.remove_traits(nodename, *traits_to_remove)
+
     def prepare_for_spawn(self, instance):
         """Perform pre-checks for spawn."""
         self._vmops.prepare_for_spawn(instance)
@@ -981,6 +1017,61 @@ def _custom_traits_sync_loop(self, compute_host):
 
             time.sleep(CONF.vmware.custom_traits_sync_loop_spacing)
 
+    def _external_customer_drs_rules_sync_loop(self):
+        """Clean up and validate DRS rules for external customers HostGroups
+
+        While during normal operations we already ensure that DRS rules get
+        created appropriately, we do not clean up no-longer-necessary ones.
+        This sync loop will ensure DRS rules and clean up, too.
+        """
+        # we create a context here, so we can follow the logs for the sync-loop
+        # more easily
+        context = nova_context.get_admin_context()  # noqa:F841
+
+        while CONF.vmware.external_customer_drs_rules_sync_loop_spacing >= 0:
+            LOG.debug('Starting external customer DRS rules sync-loop')
+            try:
+                self._vmops.sync_external_customer_drs_rules(do_cleanup=True)
+            except Exception as e:
+                LOG.exception("Finished external customer DRS rules sync-loop "
+                              "with error: %s", e)
+            else:
+                LOG.debug('Finished external customer DRS rules sync-loop')
+
+            time.sleep(CONF.
+                       vmware.external_customer_drs_rules_sync_loop_spacing)
+
+    def _external_customer_vm_groups_sync_loop(self):
+        """Sync the instances of this host into members for VmGroups
+
+        We pick the list of existing VmGroup objects from the cluster and sync
+        the members for them. _external_customer_drs_rules_sync_loop() takes
+        care of creating/deleting VmGroup objects for existing HostGroups.
+        """
+        context = nova_context.get_admin_context()
+
+        while CONF.vmware.external_customer_vm_groups_sync_loop_spacing >= 0:
+            LOG.debug('Starting external customer VmGroup sync-loop')
+            try:
+                drs_prefix = constants.DRS_EXT_CUSTOMER_PREFIX
+
+                vm_groups = cluster_util.fetch_cluster_groups(
+                    self._session, self._cluster_ref, group_type='vm')
+                vm_group_names = [vm_group_name for vm_group_name in vm_groups
+                    if vm_group_name.startswith(drs_prefix)]
+
+                for vm_group_name in vm_group_names:
+                    self._vmops.sync_external_customer_vm_group(context,
+                                                                vm_group_name)
+            except Exception as e:
+                LOG.exception("Finished external customer VmGroup sync-loop "
+                              "with error: %s", e)
+            else:
+                LOG.debug('Finished external customer VmGroup sync-loop')
+
+            time.sleep(CONF.
+                       vmware.external_customer_vm_groups_sync_loop_spacing)
+
     def check_can_live_migrate_destination(self, context, instance,
                                            src_compute_info, dst_compute_info,
                                            block_migration=False,
diff --git a/nova/virt/vmwareapi/error_util.py b/nova/virt/vmwareapi/error_util.py
index b6c0d30c351..b1e297b621a 100644
--- a/nova/virt/vmwareapi/error_util.py
+++ b/nova/virt/vmwareapi/error_util.py
@@ -54,3 +54,7 @@ class EvcModeDoesNotExist(exception.Invalid):
 
 class InClustervMotionCheckError(exception.Invalid):
     msg_fmt = _("in cluster migration failed: %(reason)s")
+
+
+class VmGroupDoesNotExist(exception.Invalid):
+    msg_fmt = _("VmGroup %(vm_group_name)s does not exist")
diff --git a/nova/virt/vmwareapi/host.py b/nova/virt/vmwareapi/host.py
index 6de474c51f4..76ce144e453 100644
--- a/nova/virt/vmwareapi/host.py
+++ b/nova/virt/vmwareapi/host.py
@@ -22,12 +22,14 @@
 from oslo_utils import units
 from oslo_utils import versionutils
 from oslo_vmware import exceptions as vexc
+from oslo_vmware import vim_util as vutil
 
 import nova.conf
 from nova import context
 from nova import exception
 from nova import objects
 from nova.objects import fields as obj_fields
+from nova.virt.vmwareapi import cluster_util
 from nova.virt.vmwareapi import ds_util
 from nova.virt.vmwareapi import vim_util
 from nova.virt.vmwareapi import vm_util
@@ -98,6 +100,9 @@ def update_status(self):
             # Get cpu, memory stats from the cluster
             per_host_stats = vm_util.get_stats_from_cluster_per_host(
                 self._session, self._cluster)
+
+            cluster_hostgroups = cluster_util.fetch_cluster_groups(
+                self._session, self._cluster, group_type="host")
         except (vexc.VimConnectionException, vexc.VimAttributeException) as ex:
             # VimAttributeException is thrown when vpxd service is down
             LOG.warning("Failed to connect with %(node)s. "
@@ -133,11 +138,30 @@ def update_status(self):
         data[self._cluster_node_name] = self._merge_stats(
             self._cluster_node_name, cluster_stats, defaults)
 
+        data[self._cluster_node_name]['hostgroups'] = {}
+        for hg in cluster_hostgroups.values():
+            # ignore empty hostgroups
+            if not getattr(hg, 'host', None):
+                continue
+
+            data[self._cluster_node_name]['hostgroups'][hg.name] = [
+                vutil.get_moref_value(h_ref) for h_ref in hg.host]
+            for h_ref in hg.host:
+                host_ref_value = vutil.get_moref_value(h_ref)
+                if host_ref_value not in per_host_stats:
+                    continue
+                h_name = per_host_stats[host_ref_value]["name"]
+                data[h_name].setdefault('hostgroups', []).append(hg.name)
+
         self._stats = data
         if self._auto_service_disabled:
             self._set_host_enabled(True)
         return data
 
+    @property
+    def hostgroups(self):
+        return self._stats[self._cluster_node_name].get('hostgroups', {})
+
     def _merge_stats(self, host, stats, defaults):
         result = deepcopy(defaults)
         result["hypervisor_hostname"] = host
diff --git a/nova/virt/vmwareapi/vmops.py b/nova/virt/vmwareapi/vmops.py
index 4fcd03182d7..20d08cb2cea 100644
--- a/nova/virt/vmwareapi/vmops.py
+++ b/nova/virt/vmwareapi/vmops.py
@@ -19,6 +19,7 @@
 Class for VM tasks like spawn, snapshot, suspend, resume etc.
 """
 
+from collections import defaultdict
 import contextlib
 import copy
 import itertools
@@ -1241,6 +1242,8 @@ def prepare_for_spawn(self, instance):
     def update_cluster_placement(self, context, instance, remove=False):
         self.sync_instance_server_group(context, instance)
         self.update_admin_vm_group_membership(instance, remove=remove)
+        self.update_external_customer_placement(context, instance,
+                                                remove=remove)
 
     def sync_instance_server_group(self, context, instance):
         try:
@@ -1273,6 +1276,233 @@ def update_admin_vm_group_membership(self, instance, remove=False):
                                                 vm_group_name, vm_ref,
                                                 remove=remove)
 
+    def _get_external_customer_vm_group_for_instance(self, instance,
+                                                     hostgroups):
+        """Return the VmGroup name for the specific instance"""
+        domain_name = instance.system_metadata['domain_name']
+        prefixes = tuple(CONF.external_customer_domain_name_prefixes)
+        drs_prefix = constants.DRS_EXT_CUSTOMER_PREFIX
+
+        vm_group_name = None
+        if domain_name in hostgroups:
+            vm_group_name = f"{drs_prefix}{domain_name}"
+        elif not domain_name.startswith(prefixes):
+            vm_group_name = f"{drs_prefix}internal_workload"
+        else:
+            prefixes_and_hg_names = [(prefix, prefix.removesuffix('-'))
+                                     for prefix in prefixes]
+            for prefix, hg_name in prefixes_and_hg_names:
+                if not domain_name.startswith(prefix):
+                    continue
+
+                if hg_name not in hostgroups:
+                    continue
+
+                vm_group_name = f"{drs_prefix}{hg_name}"
+                break
+            else:
+                LOG.error("Could not find hostgroup for instance %s in "
+                          "domain %s in the existing hostgroups %s",
+                          instance.uuid, domain_name, hostgroups)
+
+        return vm_group_name
+
+    def _get_instances_by_external_customer_vm_group(self, context):
+        """Get Instances assigned to this host grouped by VmGroup name"""
+        InstanceList = objects.instance.InstanceList
+        filters = {'host': self._compute_host, 'deleted': False}
+        instances = InstanceList.get_by_filters(
+            context, filters, expected_attrs=['system_metadata'])
+
+        hostgroups = self._vc_state.hostgroups
+
+        grouped_instances = defaultdict(list)
+        for instance in instances:
+            vm_group_name = self._get_external_customer_vm_group_for_instance(
+                instance, hostgroups)
+            grouped_instances[vm_group_name].append(instance.uuid)
+
+        return grouped_instances
+
+    def update_external_customer_placement(self, context, instance,
+                                           remove=False):
+        """Ensure DRS rules and VmGroup assignment for the instance"""
+        hg_prefixes = tuple(prefix.removesuffix('-')
+            for prefix in CONF.external_customer_domain_name_prefixes)
+        hostgroups = [hg_name for hg_name in self._vc_state.hostgroups
+                      if hg_name.startswith(hg_prefixes)]
+
+        if not hostgroups:
+            # This cluster does not manage external customer VMs.
+            return
+
+        self.sync_external_customer_drs_rules()
+
+        vm_group_name = self._get_external_customer_vm_group_for_instance(
+            instance, hostgroups)
+        if vm_group_name is None:
+            if remove:
+                # For removals we do not require a VmGroup because the VM would
+                # not be managed by the cluster soon anyways.
+                return
+            # FIXME build a VMware exception for this
+            raise exception.NovaException('')
+        self.sync_external_customer_vm_group(context, vm_group_name)
+
+    def sync_external_customer_vm_group(self, context, vm_group_name):
+        LOG.debug('Starting sync for external customer VmGroup %s',
+                  vm_group_name)
+
+        @utils.synchronized("vmware-external-customer-vm-group-"
+                            f"{vm_group_name}")
+        def _sync_vm_group(context, vm_group_name):
+            vg_instances = \
+                self._get_instances_by_external_customer_vm_group(context)
+            if vm_group_name not in vg_instances:
+                LOG.info("Sync for external customer VmGroup %s done: "
+                         "No instances assigned.", vm_group_name)
+                return
+
+            instance_uuids = vg_instances[vm_group_name]
+
+            expected_members = self._filter_instances_for_drs(
+                context, instance_uuids, vm_group_name=vm_group_name)
+            if not expected_members:
+                LOG.info("Sync for external customer VmGroup %s done: "
+                         "No expected members.", vm_group_name)
+                return
+
+            LOG.debug('Updating external customer VmGroup %s with %s members',
+                      vm_group_name, len(expected_members))
+            cluster_util.set_vm_group_members(
+                self._session, self._cluster, vm_group_name,
+                list(expected_members.values()))
+
+            LOG.debug('Sync for external customer VmGroup %s done',
+                      vm_group_name)
+
+        _sync_vm_group(context, vm_group_name)
+
+    @utils.synchronized('sync-external-customers-drs-rules')
+    def sync_external_customer_drs_rules(self, do_cleanup=True):
+        """Ensure DRS rules for anti-/affinity to HostGroups exist
+
+        We delete any DRS rules matching the naming scheme but referencing a
+        non-existing HostGroup to keep the cluster clean.
+
+        For each HostGroup there's are 2 rules. Configured with affinity to the
+        HostGroup, we have a rule referencing a VmGroup named after the
+        HostGroup. To keep internal workload away from the HostGroup, a second
+        rule configured with anti-affinity referencing the pre-defined internal
+        workload VmGroup exists.
+
+        :param:do_cleanup:  If set to `False`, no remove operations for groups
+                            and rules are executed. Can be useful outside the
+                            sync-loop to reduce possible cluster
+                            reconfiguration calls which can hang on the cluster
+                            lock waiting for vCenter-external live-migrations.
+        """
+        LOG.debug("Syncing external customer DRS rules")
+        drs_prefix = constants.DRS_EXT_CUSTOMER_PREFIX
+
+        client_factory = self._session.vim.client.factory
+        config_spec = client_factory.create('ns0:ClusterConfigSpecEx')
+        config_spec.groupSpec = []
+        config_spec.rulesSpec = []
+
+        # fetch the existing Group objects and split them into HostGroup and
+        # VmGroup objects, filtering for HostGroups that contain hosts
+        _existing_groups = cluster_util.fetch_cluster_groups(self._session,
+            self._cluster)
+
+        hg_prefixes = tuple(prefix.removesuffix('-')
+            for prefix in CONF.external_customer_domain_name_prefixes)
+        existing_hostgroups = {g.name for g in _existing_groups.values()
+            if getattr(g, 'host', None) and g.name.startswith(hg_prefixes)}
+        existing_vm_groups = {g.name for g in _existing_groups.values()
+                     if g.name.startswith(drs_prefix)}
+
+        # fetch the existing rules and deduplicate them
+        _existing_rules = sorted(cluster_util.get_rules_by_prefix(
+            self._session, self._cluster, drs_prefix),
+                                 key=attrgetter('name'))
+        existing_rules = {}
+        for rule_name, rules in itertools.groupby(_existing_rules,
+                                                  key=attrgetter('name')):
+            rules = list(rules)
+            existing_rules[rule_name] = rules[0]
+            if len(rules) == 1:
+                continue
+            # we found a duplicated rule and have to delete all but one
+            for rule in rules[1:]:
+                config_spec.rulesSpec.append(
+                    cluster_util.create_rule_spec(
+                        client_factory, rule, 'remove'))
+
+        expected_vm_group_names = {
+            f"{drs_prefix}{hg_name}" for hg_name in existing_hostgroups}
+        if existing_hostgroups:
+            # If there are no hostgroups we have to take care of, it means our
+            # cluster doesn't handle external workload and we also don't need
+            # the internal_workload groups.
+            expected_vm_group_names.add(f"{drs_prefix}internal_workload")
+
+        # Creat missing VmGroup objects so we can create rules for them
+        missing_vm_group_names = expected_vm_group_names - existing_vm_groups
+        for vm_group_name in missing_vm_group_names:
+            group = cluster_util.create_vm_group(client_factory,
+                vm_group_name, [])
+            config_spec.groupSpec.append(
+                cluster_util.create_group_spec(
+                    client_factory, group, 'add'))
+
+        expected_rule_names = set()
+        # NOTE: Experiments have shown that we can create VmGroup objects in
+        # the same call that we create a rule that uses them.
+        for hg_name in existing_hostgroups:
+            for affinity, vm_group_name in (
+                    ('affinity', f"{drs_prefix}{hg_name}"),
+                    ('anti-affinity', f"{drs_prefix}internal_workload")):
+                rule_name = f"{drs_prefix}{hg_name}_{affinity}"
+                expected_rule_names.add(rule_name)
+                if rule := existing_rules.get(rule_name):
+                    # Make sure our existing rule is enabled
+                    if not rule.enabled:
+                        rule.enabled = True
+                        config_spec.rulesSpec.append(
+                            cluster_util.create_rule_spec(
+                                client_factory, rule, 'edit'))
+                else:
+                    # Since we ensure that VmGroups we expect exist/get
+                    # created, we don't have to handle non-existing VmGroups
+                    # here.
+                    rule = cluster_util.create_vm_host_rule(client_factory,
+                        rule_name, hg_name, vm_group_name, policy=affinity)
+                    config_spec.rulesSpec.append(
+                        cluster_util.create_rule_spec(
+                            client_factory, rule, 'add'))
+
+        # Clean up no longer necessary DRS rules
+        if do_cleanup:
+            for rule_name in existing_rules.keys() - expected_rule_names:
+                config_spec.rulesSpec.append(
+                    cluster_util.create_rule_spec(
+                        client_factory, rules[rule_name], 'remove'))
+
+        if not config_spec.rulesSpec and not config_spec.groupSpec:
+            return
+
+        rule_changes = [str((rs.info.name, rs.operation))
+                        for rs in config_spec.rulesSpec]
+        group_changes = [str((gs.info.name, gs.operation))
+                         for gs in config_spec.groupSpec]
+        LOG.info('Updating external customer DRS rules and groups: %s and %s',
+                 ', '.join(rule_changes), ', '.join(group_changes))
+        cluster_util.reconfigure_cluster(self._session, self._cluster,
+                                         config_spec)
+        LOG.info('Updated external customer DRS rules and groups: %s and %s',
+                 ', '.join(rule_changes), ', '.join(group_changes))
+
     def _build_template_vm_inventory_path(self, vi):
         vm_folder_name = self._session._call_method(vutil,
                                                     "get_object_property",
@@ -4013,9 +4243,22 @@ def set_compute_host(self, compute_host):
         """Called by the driver on init_host() so we know the compute host"""
         self._compute_host = compute_host
 
-    def sync_server_group(self, context, sg_uuid):
-        """Sync a server group by its uuid for the current host/cluster
+    def _filter_instances_for_drs(self, context, instance_uuids, sg_uuid=None,
+            vm_group_name=None):
+        """Filter the given Instance UUIDs down to DRS-applicable ones
+
+        We cannot use all instances when configuring DRS rules/groups, because
+        with some instances race against other, parallel tasks. This method
+        filters down the given instance UUIDs to the ones we should be able to
+        safely use with DRS.
+
+        We return a dictionary mapping the remaining instance UUIDs to MoRefs.
         """
+        if (sg_uuid, vm_group_name) == (None, None):
+            raise ValueError(
+                "Either `sg_uuid` or `vm_group_name` is required for "
+                "`VMwareVMOps._filter_instances_for_drs()`.")
+
         # we have to ignore instances currently in a volatitle state, where
         # either VMware cannot support them being in a DRS rule or we expect
         # them to go away during the syncing process, which could lead to
@@ -4028,6 +4271,99 @@ def sync_server_group(self, context, sg_uuid):
             task_states.REBUILDING,
             task_states.REBUILD_BLOCK_DEVICE_MAPPING,
         ]
+
+        # First we check for all instances, which have ongoing migrations
+        # on the given host, either as source or destination
+
+        # Decision matrix for migrations:
+        # Mig-Status Action/Host
+        #            Source  Dest
+        # Preparing  Remove  N/A
+        # Running    Remove  Add
+        # (Other states are consistent with default behaviour)
+        #
+        # So the Instance.host will always be the source of the migration,
+        # and we want to remove the rules.
+        # We only need to handle specially the case a running migration
+        # on the destination host, and add it
+
+        MigrationList = objects.migration.MigrationList
+        filters = {
+            "host": self._compute_host,
+            "instance_uuid": instance_uuids,
+            "status": ["preparing", "running"],
+        }
+
+        expected_members = {}
+
+        migrations_by_instance_uuid = {}
+        for migration in MigrationList.get_by_filters(context, filters):
+            instance_uuid = migration.instance_uuid
+            if migration.source_compute == self._compute_host:
+                # The host is the source of a migration
+                # That means the instance will be part of the instance list
+                # So we have to remember that instance to be removed from
+                # the DRS rule-set
+                migrations_by_instance_uuid[instance_uuid] = \
+                    migration
+            else:
+                # We now handle the destination side
+                if migration.status == "preparing":
+                    # Not even started, we can ignore that one
+                    continue
+
+                # Polling the cache, as vm_util.get_vm_ref is very slow
+                # for the negative search.
+                # We just have to ensure, that the cache holds a value
+                # before syncing the server group on the destination host
+                # Race conditions are averted by the calling function's lock
+                moref = vm_util.vm_ref_cache_get(instance_uuid)
+                if moref:
+                    expected_members[instance_uuid] = moref
+
+        # retrieve the instances, because sg.members contains all members
+        # and we need to filter them for our host
+        InstanceList = objects.instance.InstanceList
+        filters = {'host': self._compute_host, 'uuid': instance_uuids,
+                   'deleted': False}
+        instances = InstanceList.get_by_filters(context, filters,
+                                                expected_attrs=[])
+
+        if sg_uuid:
+            msg_prefix = "Excluding member %s of server-group %s, "
+            msg_value = sg_uuid
+        elif vm_group_name:
+            msg_prefix = "Exluding instance %s from VmGroup %s, "
+            msg_value = vm_group_name
+
+        for instance in instances:
+            task_state = instance.task_state
+            if task_state in STATES_EXCLUDING_MEMBERS_FROM_DRS_RULES:
+                LOG.debug(msg_prefix + "because it's in task_state %s.",
+                          instance.uuid, msg_value, task_state)
+                continue
+
+            migration = migrations_by_instance_uuid.get(instance.uuid)
+            if migration:
+                LOG.debug(msg_prefix + "due to being on the source side of "
+                          "ongoing migration %s.",
+                          instance.uuid, msg_value, migration.uuid)
+                continue
+
+            try:
+                moref = vm_util.get_vm_ref(self._session, instance)
+            except exception.InstanceNotFound:
+                LOG.warning(msg_prefix + "because we could not find "
+                            "a moref for it",
+                            instance.uuid, msg_value)
+                continue
+            expected_members[instance.uuid] = moref
+
+        return expected_members
+
+    def sync_server_group(self, context, sg_uuid):
+        """Sync a server group by its uuid for the current host/cluster
+        """
         LOG.debug('Starting sync for server-group %s', sg_uuid)
 
         @utils.synchronized('vmware-server-group-{}'.format(sg_uuid))
@@ -4052,87 +4388,8 @@ def _sync_sync_server_group(context, sg_uuid):
                 LOG.debug('Sync for server-group %s done', sg_uuid)
                 return
 
-            # First we check for all instances, which have ongoing migrations
-            # on the given host, either as source or destination
-
-            # Decision matrix for migrations:
-            # Mig-Status Action/Host
-            #            Source  Dest
-            # Preparing  Remove  N/A
-            # Running    Remove  Add
-            # (Other states are consistent with default behaviour)
-            #
-            # So the Instance.host will always be the source of the migration,
-            # and we want to remove the rules.
-            # We only need to handle specially the case a running migration
-            # on the destination host, and add it
-
-            MigrationList = objects.migration.MigrationList
-            filters = {
-                "host": self._compute_host,
-                "instance_uuid": sg.members,
-                "status": ["preparing", "running"],
-            }
-
-            expected_members = {}
-
-            migrations_by_instance_uuid = {}
-            for migration in MigrationList.get_by_filters(context, filters):
-                instance_uuid = migration.instance_uuid
-                if migration.source_compute == self._compute_host:
-                    # The host is the source of a migration
-                    # That means the instance will be part of the instance list
-                    # So we have to remember that instance to be removed from
-                    # the DRS rule-set
-                    migrations_by_instance_uuid[instance_uuid] = \
-                        migration
-                else:
-                    # We now handle the destination side
-                    if migration.status == "preparing":
-                        # Not even started, we can ignore that one
-                        continue
-
-                    # Polling the cache, as vm_util.get_vm_ref is very slow
-                    # for the negative search.
-                    # We just have to ensure, that the cache holds a value
-                    # before syncing the server group on the destination host
-                    # Race conditions are averted by this functions lock
-                    moref = vm_util.vm_ref_cache_get(instance_uuid)
-                    if moref:
-                        expected_members[instance_uuid] = moref
-
-            # retrieve the instances, because sg.members contains all members
-            # and we need to filter them for our host
-            InstanceList = objects.instance.InstanceList
-            filters = {'host': self._compute_host, 'uuid': sg.members,
-                       'deleted': False}
-            instances = InstanceList.get_by_filters(context, filters,
-                                                    expected_attrs=[])
-
-            for instance in instances:
-                task_state = instance.task_state
-                if task_state in STATES_EXCLUDING_MEMBERS_FROM_DRS_RULES:
-                    LOG.debug("Excluding member %s of server-group %s, "
-                              "because it's in task_state %s.",
-                              instance.uuid, sg.uuid, task_state)
-                    continue
-
-                migration = migrations_by_instance_uuid.get(instance.uuid)
-                if migration:
-                    LOG.debug("Excluding member %s of server-group %s, "
-                              "due to being on the source side of "
-                              "ongoing migration %s.",
-                              instance.uuid, sg.uuid, migration.uuid)
-                    continue
-
-                try:
-                    moref = vm_util.get_vm_ref(self._session, instance)
-                except exception.InstanceNotFound:
-                    LOG.warning('Could not find moref for instance %s. '
-                                'Ignoring member of server-group %s',
-                                instance.uuid, sg.uuid)
-                    continue
-                expected_members[instance.uuid] = moref
+            expected_members = self._filter_instances_for_drs(
+                context, sg.members, sg_uuid=sg_uuid)
 
             rule_members_by_name = {}
             if sg.policy == 'soft-anti-affinity':