Skip to content

Commit

Permalink
Synchronization with 8.2 LINSTOR before a stable release
Browse files Browse the repository at this point in the history
Last commit: 9207abe
"fix(linstor): check if resource is tiebreaker (#62)"

Signed-off-by: Ronan Abhamon <[email protected]>
  • Loading branch information
Wescoeur committed Sep 9, 2024
1 parent 006a372 commit 08aac76
Show file tree
Hide file tree
Showing 5 changed files with 495 additions and 200 deletions.
95 changes: 37 additions & 58 deletions drivers/LinstorSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,6 @@ def load(self, sr_uuid):
self._linstor = None # Ensure that LINSTOR attribute exists.
self._journaler = None

self._is_master = False
if 'SRmaster' in self.dconf and self.dconf['SRmaster'] == 'true':
self._is_master = True
self._group_name = self.dconf['group-name']

self._vdi_shared_time = 0
Expand Down Expand Up @@ -437,7 +434,7 @@ def connect():

return wrapped_method(self, *args, **kwargs)

if not self._is_master:
if not self.is_master():
if self.cmd in [
'sr_create', 'sr_delete', 'sr_update', 'sr_probe',
'sr_scan', 'vdi_create', 'vdi_delete', 'vdi_resize',
Expand Down Expand Up @@ -472,7 +469,7 @@ def connect():

# Ensure we use a non-locked volume when vhdutil is called.
if (
self._is_master and self.cmd.startswith('vdi_') and
self.is_master() and self.cmd.startswith('vdi_') and
self.cmd != 'vdi_create'
):
self._linstor.ensure_volume_is_not_locked(
Expand All @@ -487,7 +484,7 @@ def connect():
#
# If the command is a SR command we want at least to remove
# resourceless volumes.
if self._is_master and self.cmd not in [
if self.is_master() and self.cmd not in [
'vdi_attach', 'vdi_detach',
'vdi_activate', 'vdi_deactivate',
'vdi_epoch_begin', 'vdi_epoch_end',
Expand Down Expand Up @@ -650,17 +647,17 @@ def delete(self, uuid):
opterr='Cannot get controller node name'
)

host = None
host_ref = None
if node_name == 'localhost':
host = util.get_this_host_ref(self.session)
host_ref = util.get_this_host_ref(self.session)
else:
for slave in util.get_all_slaves(self.session):
r_name = self.session.xenapi.host.get_record(slave)['hostname']
if r_name == node_name:
host = slave
host_ref = slave
break

if not host:
if not host_ref:
raise xs_errors.XenError(
'LinstorSRDelete',
opterr='Failed to find host with hostname: {}'.format(
Expand All @@ -677,7 +674,7 @@ def delete(self, uuid):
'groupName': self._group_name,
}
self._exec_manager_command(
host, 'destroy', args, 'LinstorSRDelete'
host_ref, 'destroy', args, 'LinstorSRDelete'
)
except Exception as e:
try:
Expand Down Expand Up @@ -766,22 +763,31 @@ def scan(self, uuid):
# is started without a shared and mounted /var/lib/linstor path.
try:
self._linstor.get_database_path()
except Exception:
except Exception as e:
# Failed to get database path, ensure we don't have
# VDIs in the XAPI database...
if self.session.xenapi.SR.get_VDIs(
self.session.xenapi.SR.get_by_uuid(self.uuid)
):
raise xs_errors.XenError(
'SRUnavailable',
opterr='Database is not mounted'
opterr='Database is not mounted or node name is invalid ({})'.format(e)
)

# Update the database before the restart of the GC to avoid
# bad sync in the process if new VDIs have been introduced.
super(LinstorSR, self).scan(self.uuid)
self._kick_gc()

def is_master(self):
if not hasattr(self, '_is_master'):
if 'SRmaster' not in self.dconf:
self._is_master = self.session is not None and util.is_master(self.session)
else:
self._is_master = self.dconf['SRmaster'] == 'true'

return self._is_master

@_locked_load
def vdi(self, uuid):
return LinstorVDI(self, uuid)
Expand Down Expand Up @@ -967,7 +973,7 @@ def _synchronize_metadata_and_xapi(self):
)

def _synchronize_metadata(self):
if not self._is_master:
if not self.is_master():
return

util.SMlog('Synchronize metadata...')
Expand Down Expand Up @@ -1014,7 +1020,7 @@ def _load_vdis(self):
if self._vdis_loaded:
return

assert self._is_master
assert self.is_master()

# We use a cache to avoid repeated JSON parsing.
# The performance gain is not big but we can still
Expand Down Expand Up @@ -1492,7 +1498,7 @@ def _reconnect(self):
controller_uri,
self._group_name,
repair=(
self._is_master and
self.is_master() and
self.srcmd.cmd in self.ops_exclusive
),
logger=util.SMlog
Expand Down Expand Up @@ -1660,8 +1666,11 @@ def create(self, sr_uuid, vdi_uuid, size):
volume_name = REDO_LOG_VOLUME_NAME

self._linstor.create_volume(
self.uuid, volume_size, persistent=False,
volume_name=volume_name
self.uuid,
volume_size,
persistent=False,
volume_name=volume_name,
high_availability=volume_name is not None
)
volume_info = self._linstor.get_volume_info(self.uuid)

Expand Down Expand Up @@ -1792,6 +1801,7 @@ def attach(self, sr_uuid, vdi_uuid):
writable = 'args' not in self.sr.srcmd.params or \
self.sr.srcmd.params['args'][0] == 'true'

if not attach_from_config or self.sr.is_master():
# We need to inflate the volume if we don't have enough place
# to mount the VHD image. I.e. the volume capacity must be greater
# than the VHD size + bitmap size.
Expand Down Expand Up @@ -1825,7 +1835,7 @@ def attach(self, sr_uuid, vdi_uuid):
return self._attach_using_http_nbd()

# Ensure we have a path...
self._create_chain_paths(self.uuid)
self.sr._vhdutil.create_chain_paths(self.uuid, readonly=not writable)

self.attached = True
return VDI.VDI.attach(self, self.sr.uuid, self.uuid)
Expand Down Expand Up @@ -1873,7 +1883,7 @@ def detach(self, sr_uuid, vdi_uuid):
)

# We remove only on slaves because the volume can be used by the GC.
if self.sr._is_master:
if self.sr.is_master():
return

while vdi_uuid:
Expand All @@ -1894,7 +1904,7 @@ def detach(self, sr_uuid, vdi_uuid):

def resize(self, sr_uuid, vdi_uuid, size):
util.SMlog('LinstorVDI.resize for {}'.format(self.uuid))
if not self.sr._is_master:
if not self.sr.is_master():
raise xs_errors.XenError(
'VDISize',
opterr='resize on slave not allowed'
Expand Down Expand Up @@ -2153,7 +2163,7 @@ def update(self, sr_uuid, vdi_uuid):
# --------------------------------------------------------------------------

def _prepare_thin(self, attach):
if self.sr._is_master:
if self.sr.is_master():
if attach:
attach_thin(
self.session, self.sr._journaler, self._linstor,
Expand Down Expand Up @@ -2352,7 +2362,7 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
raise xs_errors.XenError('SnapshotChainTooLong')

# Ensure we have a valid path if we don't have a local diskful.
self._create_chain_paths(self.uuid)
self.sr._vhdutil.create_chain_paths(self.uuid, readonly=True)

volume_path = self.path
if not util.pathexists(volume_path):
Expand Down Expand Up @@ -2499,10 +2509,10 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
active_uuid, clone_info, force_undo=True
)
self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid)
except Exception as e:
except Exception as clean_error:
util.SMlog(
'WARNING: Failed to clean up failed snapshot: {}'
.format(e)
.format(clean_error)
)
raise xs_errors.XenError('VDIClone', opterr=str(e))

Expand Down Expand Up @@ -2739,7 +2749,7 @@ def _attach_using_http_nbd(self):

# 0. Fetch drbd path.
must_get_device_path = True
if not self.sr._is_master:
if not self.sr.is_master():
# We are on a slave, we must try to find a diskful locally.
try:
volume_info = self._linstor.get_volume_info(self.uuid)
Expand All @@ -2754,7 +2764,7 @@ def _attach_using_http_nbd(self):
must_get_device_path = hostname in volume_info.diskful

drbd_path = None
if must_get_device_path or self.sr._is_master:
if must_get_device_path or self.sr.is_master():
# If we are master, we must ensure we have a diskless
# or diskful available to init HA.
# It also avoid this error in xensource.log
Expand Down Expand Up @@ -2812,37 +2822,6 @@ def _detach_using_http_nbd(self):
self._kill_persistent_nbd_server(volume_name)
self._kill_persistent_http_server(volume_name)

def _create_chain_paths(self, vdi_uuid):
# OPTIMIZE: Add a limit_to_first_allocated_block param to limit vhdutil calls.
# Useful for the snapshot code algorithm.

while vdi_uuid:
path = self._linstor.get_device_path(vdi_uuid)
if not util.pathexists(path):
raise xs_errors.XenError(
'VDIUnavailable', opterr='Could not find: {}'.format(path)
)

# Diskless path can be created on the fly, ensure we can open it.
def check_volume_usable():
while True:
try:
with open(path, 'r+'):
pass
except IOError as e:
if e.errno == errno.ENODATA:
time.sleep(2)
continue
if e.errno == errno.EROFS:
util.SMlog('Volume not attachable because RO. Openers: {}'.format(
self.sr._linstor.get_volume_openers(vdi_uuid)
))
raise
break
util.retry(check_volume_usable, 15, 2)

vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid

# ------------------------------------------------------------------------------


Expand Down
Loading

0 comments on commit 08aac76

Please sign in to comment.