Skip to content

Commit

Permalink
Destroy services from DB on stop()
Browse files Browse the repository at this point in the history
When manila services are stopped or restarted via stop(), the DB
entries are not deleted, they are destroyed only in kill() method. In
cluster deployments, where multiple instances of manila-scheduler are
deployed via PODs, unique hostname is derived from node name. However
if pods are deployed again and launched on new hosts/nodes, the old
entries of manila-scheduler remains as it is. Fix via removing DB
entries after service stop() and create new entries again ins start().

Closes-bug: #1990839
  • Loading branch information
kpawar-sap committed Sep 29, 2022
1 parent 77e7820 commit 8365486
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 17 deletions.
3 changes: 2 additions & 1 deletion manila/data/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ def __init__(self, service_name=None, *args, **kwargs):
super(DataManager, self).__init__(*args, **kwargs)
self.busy_tasks_shares = {}

def init_host(self):
def init_host(self, service_id=None):
ctxt = context.get_admin_context()
self.service_id = service_id
shares = self.db.share_get_all(ctxt)
for share in shares:
if share['task_state'] in constants.BUSY_COPYING_STATES:
Expand Down
3 changes: 2 additions & 1 deletion manila/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,14 @@ def periodic_tasks(self, context, raise_on_error=False):
"""Tasks to be run at a periodic interval."""
return self.run_periodic_tasks(context, raise_on_error=raise_on_error)

def init_host(self):
def init_host(self, service_id=None):
"""Handle initialization if this is a standalone service.
A hook point for services to execute tasks before the services are made
available (i.e. showing up on RPC and starting to accept RPC calls) to
other components. Child classes should override this method.
:param service_id: ID of the service where the manager is running.
"""
pass

Expand Down
20 changes: 10 additions & 10 deletions manila/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,7 @@ def start(self):
LOG.info('Starting %(topic)s node (version %(version_string)s)',
{'topic': self.topic, 'version_string': version_string})
self.model_disconnected = False
self.manager.init_host()
ctxt = context.get_admin_context()

if self.coordinator:
coordination.LOCK_COORDINATOR.start()

try:
service_ref = db.service_get_by_args(ctxt,
self.host,
Expand All @@ -148,6 +143,11 @@ def start(self):
except exception.NotFound:
self._create_service_ref(ctxt)

self.manager.init_host(service_id=self.service_id)

if self.coordinator:
coordination.LOCK_COORDINATOR.start()

LOG.debug("Creating RPC server for service %s.", self.topic)

target = messaging.Target(topic=self.topic, server=self.host)
Expand Down Expand Up @@ -227,12 +227,7 @@ def create(cls, host=None, binary=None, topic=None, manager=None,
return service_obj

def kill(self):
"""Destroy the service object in the datastore."""
self.stop()
try:
db.service_destroy(context.get_admin_context(), self.service_id)
except exception.NotFound:
LOG.warning('Service killed that has no database entry.')

def stop(self):
# Try to shut the connection down, but if we get any sort of
Expand All @@ -242,6 +237,11 @@ def stop(self):
except Exception:
pass

try:
db.service_destroy(context.get_admin_context(), self.service_id)
except exception.NotFound:
LOG.warning('Service killed that has no database entry.')

if self.coordinator:
try:
coordination.LOCK_COORDINATOR.stop()
Expand Down
3 changes: 2 additions & 1 deletion manila/share/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def _ensure_share_instance_has_pool(self, ctxt, share_instance):
return pool

@add_hooks
def init_host(self, reexport=False):
def init_host(self, service_id=None, reexport=False):
"""Initialization for a standalone service."""

# mark service alive by creating a probe
Expand All @@ -349,6 +349,7 @@ def init_host(self, reexport=False):
LOG.error("Probe not created: %(e)s", {'e': six.text_type(e)})

ctxt = context.get_admin_context()
self.service_id = service_id
driver_host_pair = "{}@{}".format(
self.driver.__class__.__name__,
self.host)
Expand Down
6 changes: 2 additions & 4 deletions manila/tests/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,21 +88,19 @@ def test_service_enabled_on_create_based_on_flag(self):
binary = 'manila-fake'
app = service.Service.create(host=host, binary=binary)
app.start()
app.stop()
ref = db.service_get(context.get_admin_context(), app.service_id)
db.service_destroy(context.get_admin_context(), app.service_id)
self.assertFalse(ref['disabled'])
app.stop()

def test_service_disabled_on_create_based_on_flag(self):
self.flags(enable_new_services=False)
host = 'foo'
binary = 'manila-fake'
app = service.Service.create(host=host, binary=binary)
app.start()
app.stop()
ref = db.service_get(context.get_admin_context(), app.service_id)
db.service_destroy(context.get_admin_context(), app.service_id)
self.assertTrue(ref['disabled'])
app.stop()


def fake_service_get_by_args(*args, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
fixes:
- |
In cluster deployments, where multiple instances of manila-scheduler are
deployed via PODs, unique hostname is derived from node name. However if
pods are deployed again and launched on new hosts/nodes, the old entries
of manila-scheduler remains as it is. Fixed it by deleting DB entries
after service.stop() and creating new entries again in service.start().
Launchpad bug `1990839 <https://bugs.launchpad.net/manila/+bug/1990839>`_

0 comments on commit 8365486

Please sign in to comment.