diff --git a/Makefile b/Makefile
index 03d0d533a..d01652d0c 100755
--- a/Makefile
+++ b/Makefile
@@ -84,6 +84,7 @@ PLUGIN_SCRIPT_DEST := /etc/xapi.d/plugins/
LIBEXEC := /opt/xensource/libexec/
UDEV_RULES_DIR := /etc/udev/rules.d/
UDEV_SCRIPTS_DIR := /etc/udev/scripts/
+SYSTEMD_CONF_DIR := /etc/systemd/system/
SYSTEMD_SERVICE_DIR := /usr/lib/systemd/system/
INIT_DIR := /etc/rc.d/init.d/
MPATH_CONF_DIR := /etc/multipath.xenserver/
@@ -138,6 +139,8 @@ install: precheck
mkdir -p $(SM_STAGING)$(UDEV_RULES_DIR)
mkdir -p $(SM_STAGING)$(UDEV_SCRIPTS_DIR)
mkdir -p $(SM_STAGING)$(INIT_DIR)
+ mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR)
+ mkdir -p $(SM_STAGING)$(SYSTEMD_CONF_DIR)/linstor-satellite.service.d
mkdir -p $(SM_STAGING)$(SYSTEMD_SERVICE_DIR)
mkdir -p $(SM_STAGING)$(MPATH_CONF_DIR)
mkdir -p $(SM_STAGING)$(MODPROBE_DIR)
@@ -163,6 +166,10 @@ install: precheck
$(SM_STAGING)/$(SM_DEST)
install -m 644 etc/logrotate.d/$(SMLOG_CONF) \
$(SM_STAGING)/$(LOGROTATE_DIR)
+ install -m 644 etc/systemd/system/linstor-satellite.service.d/override.conf \
+ $(SM_STAGING)/$(SYSTEMD_CONF_DIR)/linstor-satellite.service.d/
+ install -m 644 etc/systemd/system/var-lib-linstor.service \
+ $(SM_STAGING)/$(SYSTEMD_CONF_DIR)
install -m 644 etc/make-dummy-sr.service \
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
install -m 644 systemd/xs-sm.service \
@@ -206,6 +213,9 @@ install: precheck
install -m 755 drivers/iscsilib.py $(SM_STAGING)$(SM_DEST)
install -m 755 drivers/fcoelib.py $(SM_STAGING)$(SM_DEST)
mkdir -p $(SM_STAGING)$(LIBEXEC)
+ install -m 755 scripts/fork-log-daemon $(SM_STAGING)$(LIBEXEC)
+ install -m 755 scripts/linstor-kv-tool $(SM_STAGING)$(BIN_DEST)
+ install -m 755 scripts/safe-umount $(SM_STAGING)$(LIBEXEC)
install -m 755 scripts/local-device-change $(SM_STAGING)$(LIBEXEC)
install -m 755 scripts/check-device-sharing $(SM_STAGING)$(LIBEXEC)
install -m 755 scripts/usb_change $(SM_STAGING)$(LIBEXEC)
diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py
index a72d43e09..ca5015e31 100755
--- a/drivers/LinstorSR.py
+++ b/drivers/LinstorSR.py
@@ -19,8 +19,11 @@
try:
from linstorjournaler import LinstorJournaler
from linstorvhdutil import LinstorVhdUtil
- from linstorvolumemanager \
- import LinstorVolumeManager, LinstorVolumeManagerError
+ from linstorvolumemanager import get_controller_uri
+ from linstorvolumemanager import get_controller_node_name
+ from linstorvolumemanager import LinstorVolumeManager
+ from linstorvolumemanager import LinstorVolumeManagerError
+
LINSTOR_AVAILABLE = True
except ImportError:
LINSTOR_AVAILABLE = False
@@ -28,16 +31,24 @@
from lock import Lock
import blktap2
import cleanup
+import distutils
import errno
import functools
+import lvutil
+import os
+import re
import scsiutil
+import signal
+import socket
import SR
import SRCommand
+import subprocess
import time
import traceback
import util
import VDI
import vhdutil
+import xml.etree.ElementTree as xml_parser
import xmlrpc.client
import xs_errors
@@ -48,6 +59,23 @@
HIDDEN_TAG = 'hidden'
+XHA_CONFIG_PATH = '/etc/xensource/xhad.conf'
+
+FORK_LOG_DAEMON = '/opt/xensource/libexec/fork-log-daemon'
+
+# This flag can be disabled to debug the DRBD layer.
+# When this config var is False, the HA can only be used under
+# specific conditions:
+# - Only one heartbeat diskless VDI is present in the pool.
+# - The other hearbeat volumes must be diskful and limited to a maximum of 3.
+USE_HTTP_NBD_SERVERS = True
+
+# Useful flag to trace calls using cProfile.
+TRACE_PERFS = False
+
+# Enable/Disable VHD key hash support.
+USE_KEY_HASH = False
+
# ==============================================================================
# TODO: Supports 'VDI_INTRODUCE', 'VDI_RESET_ON_BOOT/2', 'SR_TRIM',
@@ -72,9 +100,9 @@
CONFIGURATION = [
['group-name', 'LVM group name'],
- ['hosts', 'host names to use'],
['redundancy', 'replication count'],
- ['provisioning', '"thin" or "thick" are accepted']
+ ['provisioning', '"thin" or "thick" are accepted (optional, defaults to thin)'],
+ ['monitor-db-quorum', 'disable controller when only one host is online (optional, defaults to true)']
]
DRIVER_INFO = {
@@ -92,7 +120,8 @@
OPS_EXCLUSIVE = [
'sr_create', 'sr_delete', 'sr_attach', 'sr_detach', 'sr_scan',
- 'sr_update', 'vdi_create', 'vdi_delete', 'vdi_clone', 'vdi_snapshot'
+ 'sr_update', 'sr_probe', 'vdi_init', 'vdi_create', 'vdi_delete',
+ 'vdi_attach', 'vdi_detach', 'vdi_clone', 'vdi_snapshot',
]
# ==============================================================================
@@ -136,7 +165,9 @@ def attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid):
# If the virtual VHD size is lower than the LINSTOR volume size,
# there is nothing to do.
vhd_size = compute_volume_size(
- LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid),
+ # TODO: Replace pylint comment with this feature when possible:
+ # https://github.com/PyCQA/pylint/pull/2926
+ LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid), # pylint: disable = E1120
image_type
)
@@ -180,12 +211,16 @@ def detach_thin(session, linstor, sr_uuid, vdi_uuid):
device_path = linstor.get_device_path(vdi_uuid)
new_volume_size = LinstorVolumeManager.round_up_volume_size(
- LinstorVhdUtil(session, linstor).get_size_phys(device_path)
+ # TODO: Replace pylint comment with this feature when possible:
+ # https://github.com/PyCQA/pylint/pull/2926
+ LinstorVhdUtil(session, linstor).get_size_phys(vdi_uuid) # pylint: disable = E1120
)
volume_info = linstor.get_volume_info(vdi_uuid)
old_volume_size = volume_info.virtual_size
- deflate(vdi_uuid, device_path, new_volume_size, old_volume_size)
+ deflate(
+ linstor, vdi_uuid, device_path, new_volume_size, old_volume_size
+ )
finally:
lock.release()
@@ -197,7 +232,7 @@ def inflate(journaler, linstor, vdi_uuid, vdi_path, new_size, old_size):
return
util.SMlog(
- 'Inflate {} (new VHD size={}, previous={})'
+ 'Inflate {} (size={}, previous={})'
.format(vdi_uuid, new_size, old_size)
)
@@ -206,8 +241,15 @@ def inflate(journaler, linstor, vdi_uuid, vdi_path, new_size, old_size):
)
linstor.resize_volume(vdi_uuid, new_size)
+ result_size = linstor.get_volume_size(vdi_uuid)
+ if result_size < new_size:
+ util.SMlog(
+ 'WARNING: Cannot inflate volume to {}B, result size: {}B'
+ .format(new_size, result_size)
+ )
+
if not util.zeroOut(
- vdi_path, new_size - vhdutil.VHD_FOOTER_SIZE,
+ vdi_path, result_size - vhdutil.VHD_FOOTER_SIZE,
vhdutil.VHD_FOOTER_SIZE
):
raise xs_errors.XenError(
@@ -215,11 +257,11 @@ def inflate(journaler, linstor, vdi_uuid, vdi_path, new_size, old_size):
opterr='Failed to zero out VHD footer {}'.format(vdi_path)
)
- vhdutil.setSizePhys(vdi_path, new_size, False)
+ LinstorVhdUtil(None, linstor).set_size_phys(vdi_path, result_size, False)
journaler.remove(LinstorJournaler.INFLATE, vdi_uuid)
-def deflate(vdi_uuid, vdi_path, new_size, old_size):
+def deflate(linstor, vdi_uuid, vdi_path, new_size, old_size):
new_size = LinstorVolumeManager.round_up_volume_size(new_size)
if new_size >= old_size:
return
@@ -229,16 +271,86 @@ def deflate(vdi_uuid, vdi_path, new_size, old_size):
.format(vdi_uuid, new_size, old_size)
)
- vhdutil.setSizePhys(vdi_path, new_size)
+ LinstorVhdUtil(None, linstor).set_size_phys(vdi_path, new_size)
# TODO: Change the LINSTOR volume size using linstor.resize_volume.
+IPS_XHA_CACHE = None
+
+
+def get_ips_from_xha_config_file():
+ if IPS_XHA_CACHE:
+ return IPS_XHA_CACHE
+
+ ips = dict()
+ host_id = None
+ try:
+ # Ensure there is no dirty read problem.
+ # For example if the HA is reloaded.
+ tree = util.retry(
+ lambda: xml_parser.parse(XHA_CONFIG_PATH),
+ maxretry=10,
+ period=1
+ )
+ except:
+ return (None, ips)
+
+ def parse_host_nodes(ips, node):
+ current_id = None
+ current_ip = None
+
+ for sub_node in node:
+ if sub_node.tag == 'IPaddress':
+ current_ip = sub_node.text
+ elif sub_node.tag == 'HostID':
+ current_id = sub_node.text
+ else:
+ continue
+
+ if current_id and current_ip:
+ ips[current_id] = current_ip
+ return
+ util.SMlog('Ill-formed XHA file, missing IPaddress or/and HostID')
+
+ def parse_common_config(ips, node):
+ for sub_node in node:
+ if sub_node.tag == 'host':
+ parse_host_nodes(ips, sub_node)
+
+ def parse_local_config(ips, node):
+ for sub_node in node:
+ if sub_node.tag == 'localhost':
+ for host_node in sub_node:
+ if host_node.tag == 'HostID':
+ return host_node.text
+
+ for node in tree.getroot():
+ if node.tag == 'common-config':
+ parse_common_config(ips, node)
+ elif node.tag == 'local-config':
+ host_id = parse_local_config(ips, node)
+ else:
+ continue
+
+ if ips and host_id:
+ break
+
+ return (host_id and ips.get(host_id), ips)
+
+
+def activate_lvm_group(group_name):
+ path = group_name.split('/')
+ assert path and len(path) <= 2
+ try:
+ lvutil.setActiveVG(path[0], True)
+ except Exception as e:
+ util.SMlog('Cannot active VG `{}`: {}'.format(path[0], e))
+
# ==============================================================================
# Usage example:
# xe sr-create type=linstor name-label=linstor-sr
# host-uuid=d2deba7a-c5ad-4de1-9a20-5c8df3343e93
-# device-config:hosts=node-linstor1,node-linstor2,node-linstor3
# device-config:group-name=vg_loop device-config:redundancy=2
@@ -250,6 +362,11 @@ class LinstorSR(SR.SR):
MANAGER_PLUGIN = 'linstor-manager'
+ INIT_STATUS_NOT_SET = 0
+ INIT_STATUS_IN_PROGRESS = 1
+ INIT_STATUS_OK = 2
+ INIT_STATUS_FAIL = 3
+
# --------------------------------------------------------------------------
# SR methods.
# --------------------------------------------------------------------------
@@ -265,8 +382,6 @@ def load(self, sr_uuid):
)
# Check parameters.
- if 'hosts' not in self.dconf or not self.dconf['hosts']:
- raise xs_errors.XenError('LinstorConfigHostsMissing')
if 'group-name' not in self.dconf or not self.dconf['group-name']:
raise xs_errors.XenError('LinstorConfigGroupNameMissing')
if 'redundancy' not in self.dconf or not self.dconf['redundancy']:
@@ -289,6 +404,10 @@ def load(self, sr_uuid):
else:
self._provisioning = self.PROVISIONING_DEFAULT
+ monitor_db_quorum = self.dconf.get('monitor-db-quorum')
+ self._monitor_db_quorum = (monitor_db_quorum is None) or \
+ distutils.util.strtobool(monitor_db_quorum)
+
# Note: We don't have access to the session field if the
# 'vdi_attach_from_config' command is executed.
self._has_session = self.sr_ref and self.session is not None
@@ -307,8 +426,8 @@ def load(self, sr_uuid):
self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid)
self.sr_vditype = SR.DEFAULT_TAP
- self._hosts = self.dconf['hosts'].split(',')
- self._redundancy = int(self.dconf['redundancy'] or 1)
+ if self.cmd == 'sr_create':
+ self._redundancy = int(self.dconf['redundancy']) or 1
self._linstor = None # Ensure that LINSTOR attribute exists.
self._journaler = None
@@ -317,46 +436,72 @@ def load(self, sr_uuid):
self._is_master = True
self._group_name = self.dconf['group-name']
- self._master_uri = None
- self._vdi_shared_locked = False
+ self._vdi_shared_time = 0
- self._initialized = False
+ self._init_status = self.INIT_STATUS_NOT_SET
+
+ self._vdis_loaded = False
+ self._all_volume_info_cache = None
+ self._all_volume_metadata_cache = None
def _locked_load(method):
- @functools.wraps(method)
- def wrap(self, *args, **kwargs):
- if self._initialized:
- return method(self, *args, **kwargs)
- self._initialized = True
+ def wrapped_method(self, *args, **kwargs):
+ self._init_status = self.INIT_STATUS_OK
+ return method(self, *args, **kwargs)
- if not self._has_session:
- if self.srcmd.cmd == 'vdi_attach_from_config':
- # We must have a valid LINSTOR instance here without using
- # the XAPI.
- self._master_uri = 'linstor://{}'.format(
- util.get_master_address()
- )
- self._journaler = LinstorJournaler(
- self._master_uri, self._group_name, logger=util.SMlog
- )
+ def load(self, *args, **kwargs):
+ # Activate all LVMs to make drbd-reactor happy.
+ if self.srcmd.cmd == 'sr_attach':
+ activate_lvm_group(self._group_name)
- try:
+ if not self._has_session:
+ if self.srcmd.cmd in (
+ 'vdi_attach_from_config',
+ 'vdi_detach_from_config',
+ # When on-slave (is_open) is executed we have an
+ # empty command.
+ None
+ ):
+ def create_linstor(uri, attempt_count=30):
self._linstor = LinstorVolumeManager(
- self._master_uri,
+ uri,
self._group_name,
- logger=util.SMlog
- )
- return
- except Exception as e:
- util.SMlog(
- 'Ignore exception. Failed to build LINSTOR '
- 'instance without session: {}'.format(e)
+ logger=util.SMlog,
+ attempt_count=attempt_count
)
- return
- self._master_uri = 'linstor://{}'.format(
- util.get_master_rec(self.session)['address']
- )
+ controller_uri = get_controller_uri()
+ if controller_uri:
+ create_linstor(controller_uri)
+ else:
+ def connect():
+ # We must have a valid LINSTOR instance here without using
+ # the XAPI. Fallback with the HA config file.
+ for ip in get_ips_from_xha_config_file()[1].values():
+ controller_uri = 'linstor://' + ip
+ try:
+ util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip))
+ create_linstor(controller_uri, attempt_count=0)
+ return controller_uri
+ except:
+ pass
+
+ controller_uri = util.retry(connect, maxretry=30, period=1)
+ if not controller_uri:
+ raise xs_errors.XenError(
+ 'SRUnavailable',
+ opterr='No valid controller URI to attach/detach from config'
+ )
+
+ self._journaler = LinstorJournaler(
+ controller_uri, self._group_name, logger=util.SMlog
+ )
+
+ if self.srcmd.cmd is None:
+ # Only useful on on-slave plugin (is_open).
+ self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
+
+ return wrapped_method(self, *args, **kwargs)
if not self._is_master:
if self.cmd in [
@@ -374,37 +519,12 @@ def wrap(self, *args, **kwargs):
# behaviors if the GC is executed during an action on a slave.
if self.cmd.startswith('vdi_'):
self._shared_lock_vdi(self.srcmd.params['vdi_uuid'])
- self._vdi_shared_locked = True
+ self._vdi_shared_time = time.time()
- self._journaler = LinstorJournaler(
- self._master_uri, self._group_name, logger=util.SMlog
- )
-
- # Ensure ports are opened and LINSTOR controller/satellite
- # are activated.
- if self.srcmd.cmd == 'sr_create':
- # TODO: Disable if necessary
- self._enable_linstor_on_all_hosts(status=True)
-
- try:
- # Try to open SR if exists.
- self._linstor = LinstorVolumeManager(
- self._master_uri,
- self._group_name,
- repair=self._is_master,
- logger=util.SMlog
- )
- self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
- except Exception as e:
- if self.srcmd.cmd == 'sr_create' or \
- self.srcmd.cmd == 'sr_detach':
- # Ignore exception in this specific case: sr_create.
- # At this moment the LinstorVolumeManager cannot be
- # instantiated. Concerning the sr_detach command, we must
- # ignore LINSTOR exceptions (if the volume group doesn't
- # exist for example after a bad user action).
- pass
- else:
+ if self.srcmd.cmd != 'sr_create' and self.srcmd.cmd != 'sr_detach':
+ try:
+ self._reconnect()
+ except Exception as e:
raise xs_errors.XenError('SRUnavailable', opterr=str(e))
if self._linstor:
@@ -416,41 +536,87 @@ def wrap(self, *args, **kwargs):
if hosts:
util.SMlog('Failed to join node(s): {}'.format(hosts))
+ # Ensure we use a non-locked volume when vhdutil is called.
+ if (
+ self._is_master and self.cmd.startswith('vdi_') and
+ self.cmd != 'vdi_create'
+ ):
+ self._linstor.ensure_volume_is_not_locked(
+ self.srcmd.params['vdi_uuid']
+ )
+
try:
- # If the command is a SR command on the master, we must
- # load all VDIs and clean journal transactions.
- # We must load the VDIs in the snapshot case too.
+ # If the command is a SR scan command on the master,
+ # we must load all VDIs and clean journal transactions.
+ # We must load the VDIs in the snapshot case too only if
+ # there is at least one entry in the journal.
+ #
+ # If the command is a SR command we want at least to remove
+ # resourceless volumes.
if self._is_master and self.cmd not in [
'vdi_attach', 'vdi_detach',
'vdi_activate', 'vdi_deactivate',
'vdi_epoch_begin', 'vdi_epoch_end',
'vdi_update', 'vdi_destroy'
]:
- self._load_vdis()
- self._undo_all_journal_transactions()
+ load_vdis = (
+ self.cmd == 'sr_scan' or
+ self.cmd == 'sr_attach'
+ ) or len(
+ self._journaler.get_all(LinstorJournaler.INFLATE)
+ ) or len(
+ self._journaler.get_all(LinstorJournaler.CLONE)
+ )
+
+ if load_vdis:
+ self._load_vdis()
+
self._linstor.remove_resourceless_volumes()
self._synchronize_metadata()
except Exception as e:
+ if self.cmd == 'sr_scan' or self.cmd == 'sr_attach':
+ # Always raise, we don't want to remove VDIs
+ # from the XAPI database otherwise.
+ raise e
util.SMlog(
'Ignoring exception in LinstorSR.load: {}'.format(e)
)
util.SMlog(traceback.format_exc())
- return method(self, *args, **kwargs)
+ return wrapped_method(self, *args, **kwargs)
+
+ @functools.wraps(wrapped_method)
+ def wrap(self, *args, **kwargs):
+ if self._init_status in \
+ (self.INIT_STATUS_OK, self.INIT_STATUS_IN_PROGRESS):
+ return wrapped_method(self, *args, **kwargs)
+ if self._init_status == self.INIT_STATUS_FAIL:
+ util.SMlog(
+ 'Can\'t call method {} because initialization failed'
+ .format(method)
+ )
+ else:
+ try:
+ self._init_status = self.INIT_STATUS_IN_PROGRESS
+ return load(self, *args, **kwargs)
+ except Exception:
+ if self._init_status != self.INIT_STATUS_OK:
+ self._init_status = self.INIT_STATUS_FAIL
+ raise
return wrap
- @_locked_load
def cleanup(self):
- if self._vdi_shared_locked:
+ if self._vdi_shared_time:
self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False)
@_locked_load
def create(self, uuid, size):
util.SMlog('LinstorSR.create for {}'.format(self.uuid))
- if self._redundancy > len(self._hosts):
+ host_adresses = util.get_host_addresses(self.session)
+ if self._redundancy > len(host_adresses):
raise xs_errors.XenError(
'LinstorSRCreate',
opterr='Redundancy greater than host count'
@@ -472,15 +638,39 @@ def create(self, uuid, size):
opterr='group name must be unique'
)
+ if srs:
+ raise xs_errors.XenError(
+ 'LinstorSRCreate',
+ opterr='LINSTOR SR must be unique in a pool'
+ )
+
+ online_hosts = util.get_online_hosts(self.session)
+ if len(online_hosts) < len(host_adresses):
+ raise xs_errors.XenError(
+ 'LinstorSRCreate',
+ opterr='Not enough online hosts'
+ )
+
+ ips = {}
+ for host_ref in online_hosts:
+ record = self.session.xenapi.host.get_record(host_ref)
+ hostname = record['hostname']
+ ips[hostname] = record['address']
+
+ # Ensure ports are opened and LINSTOR satellites
+ # are activated. In the same time the drbd-reactor instances
+ # must be stopped.
+ self._prepare_sr_on_all_hosts(self._group_name, enabled=True)
+
# Create SR.
# Throw if the SR already exists.
try:
self._linstor = LinstorVolumeManager.create_sr(
- self._master_uri,
self._group_name,
- self._hosts,
+ ips,
self._redundancy,
thin_provisioning=self._provisioning == 'thin',
+ auto_quorum=self._monitor_db_quorum,
logger=util.SMlog
)
self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
@@ -488,30 +678,83 @@ def create(self, uuid, size):
util.SMlog('Failed to create LINSTOR SR: {}'.format(e))
raise xs_errors.XenError('LinstorSRCreate', opterr=str(e))
+ try:
+ util.SMlog(
+ "Finishing SR creation, enable drbd-reactor on all hosts..."
+ )
+ self._update_drbd_reactor_on_all_hosts(enabled=True)
+ except Exception as e:
+ try:
+ self._linstor.destroy()
+ except Exception as e2:
+ util.SMlog(
+ 'Failed to destroy LINSTOR SR after creation fail: {}'
+ .format(e2)
+ )
+ raise e
+
@_locked_load
def delete(self, uuid):
util.SMlog('LinstorSR.delete for {}'.format(self.uuid))
cleanup.gc_force(self.session, self.uuid)
- if self.vdis:
+ if self.vdis or self._linstor._volumes:
raise xs_errors.XenError('SRNotEmpty')
- try:
- # TODO: Use specific exceptions. If the LINSTOR group doesn't
- # exist, we can remove it without problem.
+ node_name = get_controller_node_name()
+ if not node_name:
+ raise xs_errors.XenError(
+ 'LinstorSRDelete',
+ opterr='Cannot get controller node name'
+ )
- # TODO: Maybe remove all volumes unused by the SMAPI.
- # We must ensure it's a safe idea...
+ host = None
+ if node_name == 'localhost':
+ host = util.get_this_host_ref(self.session)
+ else:
+ for slave in util.get_all_slaves(self.session):
+ r_name = self.session.xenapi.host.get_record(slave)['hostname']
+ if r_name == node_name:
+ host = slave
+ break
+
+ if not host:
+ raise xs_errors.XenError(
+ 'LinstorSRDelete',
+ opterr='Failed to find host with hostname: {}'.format(
+ node_name
+ )
+ )
- self._linstor.destroy()
- Lock.cleanupAll(self.uuid)
+ try:
+ self._update_drbd_reactor_on_all_hosts(
+ controller_node_name=node_name, enabled=False
+ )
+
+ args = {
+ 'groupName': self._group_name,
+ }
+ self._exec_manager_command(
+ host, 'destroy', args, 'LinstorSRDelete'
+ )
except Exception as e:
+ try:
+ self._update_drbd_reactor_on_all_hosts(
+ controller_node_name=node_name, enabled=True
+ )
+ except Exception as e2:
+ util.SMlog(
+ 'Failed to restart drbd-reactor after destroy fail: {}'
+ .format(e2)
+ )
util.SMlog('Failed to delete LINSTOR SR: {}'.format(e))
raise xs_errors.XenError(
'LinstorSRDelete',
opterr=str(e)
)
+ Lock.cleanupAll(self.uuid)
+
@_locked_load
def update(self, uuid):
util.SMlog('LinstorSR.update for {}'.format(self.uuid))
@@ -558,6 +801,9 @@ def probe(self):
@_locked_load
def scan(self, uuid):
+ if self._init_status == self.INIT_STATUS_FAIL:
+ return
+
util.SMlog('LinstorSR.scan for {}'.format(self.uuid))
if not self._linstor:
raise xs_errors.XenError(
@@ -565,6 +811,9 @@ def scan(self, uuid):
opterr='no such volume group: {}'.format(self._group_name)
)
+ # Note: `scan` can be called outside this module, so ensure the VDIs
+ # are loaded.
+ self._load_vdis()
self._update_physical_size()
for vdi_uuid in self.vdis.keys():
@@ -588,10 +837,9 @@ def vdi(self, uuid):
# --------------------------------------------------------------------------
def _shared_lock_vdi(self, vdi_uuid, locked=True):
- pools = self.session.xenapi.pool.get_all()
- master = self.session.xenapi.pool.get_master(pools[0])
+ master = util.get_master_ref(self.session)
- method = 'lockVdi'
+ command = 'lockVdi'
args = {
'groupName': self._group_name,
'srUuid': self.uuid,
@@ -599,48 +847,128 @@ def _shared_lock_vdi(self, vdi_uuid, locked=True):
'locked': str(locked)
}
- ret = self.session.xenapi.host.call_plugin(
- master, self.MANAGER_PLUGIN, method, args
- )
- util.SMlog(
- 'call-plugin ({} with {}) returned: {}'
- .format(method, args, ret)
- )
- if ret == 'False':
- raise xs_errors.XenError(
- 'VDIUnavailable',
- opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN)
- )
+ # Note: We must avoid to unlock the volume if the timeout is reached
+ # because during volume unlock, the SR lock is not used. Otherwise
+ # we could destroy a valid lock acquired from another host...
+ #
+ # This code is not very clean, the ideal solution would be to acquire
+ # the SR lock during volume unlock (like lock) but it's not easy
+ # to implement without impacting performance.
+ if not locked:
+ elapsed_time = time.time() - self._vdi_shared_time
+ timeout = LinstorVolumeManager.LOCKED_EXPIRATION_DELAY * 0.7
+ if elapsed_time >= timeout:
+ util.SMlog(
+ 'Avoid unlock call of {} because timeout has been reached'
+ .format(vdi_uuid)
+ )
+ return
+
+ self._exec_manager_command(master, command, args, 'VDIUnavailable')
# --------------------------------------------------------------------------
# Network.
# --------------------------------------------------------------------------
- def _enable_linstor(self, host, status):
- method = 'enable'
- args = {'enabled': str(bool(status))}
+ def _exec_manager_command(self, host_ref, command, args, error):
+ host_rec = self.session.xenapi.host.get_record(host_ref)
+ host_uuid = host_rec['uuid']
+
+ try:
+ ret = self.session.xenapi.host.call_plugin(
+ host_ref, self.MANAGER_PLUGIN, command, args
+ )
+ except Exception as e:
+ util.SMlog(
+ 'call-plugin on {} ({}:{} with {}) raised'.format(
+ host_uuid, self.MANAGER_PLUGIN, command, args
+ )
+ )
+ raise e
- ret = self.session.xenapi.host.call_plugin(
- host, self.MANAGER_PLUGIN, method, args
- )
util.SMlog(
- 'call-plugin ({} with {}) returned: {}'.format(method, args, ret)
+ 'call-plugin on {} ({}:{} with {}) returned: {}'.format(
+ host_uuid, self.MANAGER_PLUGIN, command, args, ret
+ )
)
if ret == 'False':
raise xs_errors.XenError(
- 'SRUnavailable',
+ error,
opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN)
)
- def _enable_linstor_on_master(self, status):
- pools = self.session.xenapi.pool.get_all()
- master = self.session.xenapi.pool.get_master(pools[0])
- self._enable_linstor(master, status)
+ def _prepare_sr(self, host, group_name, enabled):
+ self._exec_manager_command(
+ host,
+ 'prepareSr' if enabled else 'releaseSr',
+ {'groupName': group_name},
+ 'SRUnavailable'
+ )
+
+ def _prepare_sr_on_all_hosts(self, group_name, enabled):
+ master = util.get_master_ref(self.session)
+ self._prepare_sr(master, group_name, enabled)
- def _enable_linstor_on_all_hosts(self, status):
- self._enable_linstor_on_master(status)
for slave in util.get_all_slaves(self.session):
- self._enable_linstor(slave, status)
+ self._prepare_sr(slave, group_name, enabled)
+
+ def _update_drbd_reactor(self, host, enabled):
+ self._exec_manager_command(
+ host,
+ 'updateDrbdReactor',
+ {'enabled': str(enabled)},
+ 'SRUnavailable'
+ )
+
+ def _update_drbd_reactor_on_all_hosts(
+ self, enabled, controller_node_name=None
+ ):
+ if controller_node_name == 'localhost':
+ controller_node_name = self.session.xenapi.host.get_record(
+ util.get_this_host_ref(self.session)
+ )['hostname']
+ assert controller_node_name
+ assert controller_node_name != 'localhost'
+
+ controller_host = None
+ secondary_hosts = []
+
+ hosts = self.session.xenapi.host.get_all_records()
+ for host_ref, host_rec in hosts.iteritems():
+ hostname = host_rec['hostname']
+ if controller_node_name == hostname:
+ controller_host = host_ref
+ else:
+ secondary_hosts.append((host_ref, hostname))
+
+ action_name = 'Starting' if enabled else 'Stopping'
+ if controller_node_name and not controller_host:
+ util.SMlog('Failed to find controller host: `{}`'.format(
+ controller_node_name
+ ))
+
+ if enabled and controller_host:
+ util.SMlog('{} drbd-reactor on controller host `{}`...'.format(
+ action_name, controller_node_name
+ ))
+ # If enabled is true, we try to start the controller on the desired
+ # node name first.
+ self._update_drbd_reactor(controller_host, enabled)
+
+ for host_ref, hostname in secondary_hosts:
+ util.SMlog('{} drbd-reactor on host {}...'.format(
+ action_name, hostname
+ ))
+ self._update_drbd_reactor(host_ref, enabled)
+
+ if not enabled and controller_host:
+ util.SMlog('{} drbd-reactor on controller host `{}`...'.format(
+ action_name, controller_node_name
+ ))
+ # If enabled is false, we disable the drbd-reactor service of
+ # the controller host last. Why? Otherwise the linstor-controller
+ # of other nodes can be started, and we don't want that.
+ self._update_drbd_reactor(controller_host, enabled)
# --------------------------------------------------------------------------
# Metadata.
@@ -653,7 +981,7 @@ def _synchronize_metadata_and_xapi(self):
# Now update the VDI information in the metadata if required.
xenapi = self.session.xenapi
- volumes_metadata = self._linstor.volumes_with_metadata
+ volumes_metadata = self._linstor.get_volumes_with_metadata()
for vdi_uuid, volume_metadata in volumes_metadata.items():
try:
vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid)
@@ -708,36 +1036,43 @@ def _update_stats(self, virt_alloc_delta):
# Update size attributes of the SR parent class.
self.virtual_allocation = valloc + virt_alloc_delta
- # Physical size contains the total physical size.
- # i.e. the sum of the sizes of all devices on all hosts, not the AVG.
self._update_physical_size()
# Notify SR parent class.
self._db_update()
def _update_physical_size(self):
- # Physical size contains the total physical size.
- # i.e. the sum of the sizes of all devices on all hosts, not the AVG.
- self.physical_size = self._linstor.physical_size
+ # We use the size of the smallest disk, this is an approximation that
+ # ensures the displayed physical size is reachable by the user.
+ (min_physical_size, pool_count) = self._linstor.get_min_physical_size()
+ self.physical_size = min_physical_size * pool_count / \
+ self._linstor.redundancy
- # `self._linstor.physical_free_size` contains the total physical free
- # memory. If Thin provisioning is used we can't use it, we must use
- # LINSTOR volume size to gives a good idea of the required
- # usable memory to the users.
- self.physical_utilisation = self._linstor.total_allocated_volume_size
-
- # If Thick provisioning is used, we can use this line instead:
- # self.physical_utilisation = \
- # self.physical_size - self._linstor.physical_free_size
+ self.physical_utilisation = self._linstor.allocated_volume_size
# --------------------------------------------------------------------------
# VDIs.
# --------------------------------------------------------------------------
def _load_vdis(self):
- if self.vdis:
+ if self._vdis_loaded:
return
+ assert self._is_master
+
+ # We use a cache to avoid repeated JSON parsing.
+ # The performance gain is not big but we can still
+ # enjoy it with a few lines.
+ self._create_linstor_cache()
+ self._load_vdis_ex()
+ self._destroy_linstor_cache()
+
+ # We must mark VDIs as loaded only if the load is a success.
+ self._vdis_loaded = True
+
+ self._undo_all_journal_transactions()
+
+ def _load_vdis_ex(self):
# 1. Get existing VDIs in XAPI.
xenapi = self.session.xenapi
xapi_vdi_uuids = set()
@@ -745,8 +1080,8 @@ def _load_vdis(self):
xapi_vdi_uuids.add(xenapi.VDI.get_uuid(vdi))
# 2. Get volumes info.
- all_volume_info = self._linstor.volumes_with_info
- volumes_metadata = self._linstor.volumes_with_metadata
+ all_volume_info = self._all_volume_info_cache
+ volumes_metadata = self._all_volume_metadata_cache
# 3. Get CBT vdis.
# See: https://support.citrix.com/article/CTX230619
@@ -758,7 +1093,8 @@ def _load_vdis(self):
introduce = False
- if self.cmd == 'sr_scan':
+ # Try to introduce VDIs only during scan/attach.
+ if self.cmd == 'sr_scan' or self.cmd == 'sr_attach':
has_clone_entries = list(self._journaler.get_all(
LinstorJournaler.CLONE
).items())
@@ -782,6 +1118,9 @@ def _load_vdis(self):
if not introduce:
continue
+ if vdi_uuid.startswith('DELETED_'):
+ continue
+
volume_metadata = volumes_metadata.get(vdi_uuid)
if not volume_metadata:
util.SMlog(
@@ -836,10 +1175,10 @@ def _load_vdis(self):
util.SMlog(
'Introducing VDI {} '.format(vdi_uuid) +
- ' (name={}, virtual_size={}, physical_size={})'.format(
+ ' (name={}, virtual_size={}, allocated_size={})'.format(
name_label,
volume_info.virtual_size,
- volume_info.physical_size
+ volume_info.allocated_size
)
)
@@ -857,7 +1196,7 @@ def _load_vdis(self):
sm_config,
managed,
str(volume_info.virtual_size),
- str(volume_info.physical_size)
+ str(volume_info.allocated_size)
)
is_a_snapshot = volume_metadata.get(IS_A_SNAPSHOT_TAG)
@@ -881,9 +1220,11 @@ def _load_vdis(self):
vdi = self.vdi(vdi_uuid)
self.vdis[vdi_uuid] = vdi
- if vdi.vdi_type == vhdutil.VDI_TYPE_VHD:
+ if USE_KEY_HASH and vdi.vdi_type == vhdutil.VDI_TYPE_VHD:
+ # TODO: Replace pylint comment with this feature when possible:
+ # https://github.com/PyCQA/pylint/pull/2926
vdi.sm_config_override['key_hash'] = \
- self._vhdutil.get_key_hash(vdi_uuid)
+ self._vhdutil.get_key_hash(vdi_uuid) # pylint: disable = E1120
# 4.c. Update CBT status of disks either just added
# or already in XAPI.
@@ -940,7 +1281,7 @@ def _load_vdis(self):
else:
geneology[vdi.parent] = [vdi_uuid]
if not vdi.hidden:
- self.virtual_allocation += vdi.utilisation
+ self.virtual_allocation += vdi.size
# 9. Remove all hidden leaf nodes to avoid introducing records that
# will be GC'ed.
@@ -1014,13 +1355,18 @@ def _handle_interrupted_inflate(self, vdi_uuid, old_size):
util.SMlog('Cannot deflate missing VDI {}'.format(vdi_uuid))
return
- current_size = self._linstor.get_volume_info(self.uuid).virtual_size
+ assert not self._all_volume_info_cache
+ volume_info = self._linstor.get_volume_info(vdi_uuid)
+
+ current_size = volume_info.virtual_size
+ assert current_size > 0
+
util.zeroOut(
vdi.path,
current_size - vhdutil.VHD_FOOTER_SIZE,
vhdutil.VHD_FOOTER_SIZE
)
- deflate(vdi_uuid, vdi.path, old_size, current_size)
+ deflate(self._linstor, vdi_uuid, vdi.path, old_size, current_size)
def _handle_interrupted_clone(
self, vdi_uuid, clone_info, force_undo=False
@@ -1033,7 +1379,7 @@ def _handle_interrupted_clone(
base_uuid, snap_uuid = clone_info.split('_')
# Use LINSTOR data because new VDIs may not be in the XAPI.
- volume_names = self._linstor.volumes_with_name
+ volume_names = self._linstor.get_volumes_with_name()
# Check if we don't have a base VDI. (If clone failed at startup.)
if base_uuid not in volume_names:
@@ -1089,7 +1435,7 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid):
if base_type == vhdutil.VDI_TYPE_VHD:
vhd_info = self._vhdutil.get_vhd_info(base_uuid, False)
if vhd_info.hidden:
- vhdutil.setHidden(base_path, False)
+ self._vhdutil.set_hidden(base_path, False)
elif base_type == vhdutil.VDI_TYPE_RAW and \
base_metadata.get(HIDDEN_TAG):
self._linstor.update_volume_metadata(
@@ -1099,10 +1445,6 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid):
# Remove the child nodes.
if snap_uuid and snap_uuid in volume_names:
util.SMlog('Destroying snap {}...'.format(snap_uuid))
- snap_metadata = self._linstor.get_volume_metadata(snap_uuid)
-
- if snap_metadata.get(VDI_TYPE_TAG) != vhdutil.VDI_TYPE_VHD:
- raise util.SMException('Clone {} not VHD'.format(snap_uuid))
try:
self._linstor.destroy_volume(snap_uuid)
@@ -1150,10 +1492,64 @@ def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid):
util.SMlog('*** INTERRUPTED CLONE OP: rollback success')
+ # --------------------------------------------------------------------------
+ # Cache.
+ # --------------------------------------------------------------------------
+
+ def _create_linstor_cache(self):
+ # TODO: use a nonlocal with python3.
+ class context:
+ reconnect = False
+
+ def create_cache():
+ try:
+ if context.reconnect:
+ self._reconnect()
+ return self._linstor.get_volumes_with_info()
+ except Exception as e:
+ context.reconnect = True
+ raise e
+
+ self._all_volume_metadata_cache = \
+ self._linstor.get_volumes_with_metadata()
+ self._all_volume_info_cache = util.retry(
+ create_cache,
+ maxretry=10,
+ period=3
+ )
+
+ def _destroy_linstor_cache(self):
+ self._all_volume_info_cache = None
+ self._all_volume_metadata_cache = None
+
# --------------------------------------------------------------------------
# Misc.
# --------------------------------------------------------------------------
+ def _reconnect(self):
+ controller_uri = get_controller_uri()
+
+ self._journaler = LinstorJournaler(
+ controller_uri, self._group_name, logger=util.SMlog
+ )
+
+ # Try to open SR if exists.
+ # We can repair only if we are on the master AND if
+ # we are trying to execute an exclusive operation.
+ # Otherwise we could try to delete a VDI being created or
+ # during a snapshot. An exclusive op is the guarantee that
+ # the SR is locked.
+ self._linstor = LinstorVolumeManager(
+ controller_uri,
+ self._group_name,
+ repair=(
+ self._is_master and
+ self.srcmd.cmd in self.ops_exclusive
+ ),
+ logger=util.SMlog
+ )
+ self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
+
def _ensure_space_available(self, amount_needed):
space_available = self._linstor.max_volume_size_allowed
if (space_available < amount_needed):
@@ -1233,7 +1629,7 @@ def raise_bad_load(e):
if (
self.sr.srcmd.cmd == 'vdi_attach_from_config' or
self.sr.srcmd.cmd == 'vdi_detach_from_config'
- ) and self.sr.srcmd.params['vdi_uuid'] == self.uuid:
+ ):
self.vdi_type = vhdutil.VDI_TYPE_RAW
self.path = self.sr.srcmd.params['vdi_path']
else:
@@ -1297,11 +1693,11 @@ def create(self, sr_uuid, vdi_uuid, size):
# 2. Compute size and check space available.
size = vhdutil.validate_and_round_vhd_size(int(size))
- util.SMlog('LinstorVDI.create: type={}, size={}'.format(
- self.vdi_type, size
- ))
-
volume_size = compute_volume_size(size, self.vdi_type)
+ util.SMlog(
+ 'LinstorVDI.create: type={}, vhd-size={}, volume-size={}'
+ .format(self.vdi_type, size, volume_size)
+ )
self.sr._ensure_space_available(volume_size)
# 3. Set sm_config attribute of VDI parent class.
@@ -1310,8 +1706,15 @@ def create(self, sr_uuid, vdi_uuid, size):
# 4. Create!
failed = False
try:
+ volume_name = None
+ if self.ty == 'ha_statefile':
+ volume_name = 'xcp-persistent-ha-statefile'
+ elif self.ty == 'redo_log':
+ volume_name = 'xcp-persistent-redo-log'
+
self._linstor.create_volume(
- self.uuid, volume_size, persistent=False
+ self.uuid, volume_size, persistent=False,
+ volume_name=volume_name
)
volume_info = self._linstor.get_volume_info(self.uuid)
@@ -1320,16 +1723,16 @@ def create(self, sr_uuid, vdi_uuid, size):
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
self.size = volume_info.virtual_size
else:
- vhdutil.create(
+ self.sr._vhdutil.create(
self.path, size, False, self.MAX_METADATA_VIRT_SIZE
)
self.size = self.sr._vhdutil.get_size_virt(self.uuid)
if self._key_hash:
- vhdutil.setKey(self.path, self._key_hash)
+ self.sr._vhdutil.set_key(self.path, self._key_hash)
# Because vhdutil commands modify the volume data,
- # we must retrieve a new time the utilisation size.
+ # we must retrieve a new time the utilization size.
volume_info = self._linstor.get_volume_info(self.uuid)
volume_metadata = {
@@ -1344,6 +1747,13 @@ def create(self, sr_uuid, vdi_uuid, size):
METADATA_OF_POOL_TAG: ''
}
self._linstor.set_volume_metadata(self.uuid, volume_metadata)
+
+ # Set the open timeout to 1min to reduce CPU usage
+ # in http-disk-server when a secondary server tries to open
+ # an already opened volume.
+ if self.ty == 'ha_statefile' or self.ty == 'redo_log':
+ self._linstor.set_auto_promote_timeout(self.uuid, 600)
+
self._linstor.mark_volume_as_persistent(self.uuid)
except util.CommandException as e:
failed = True
@@ -1364,11 +1774,11 @@ def create(self, sr_uuid, vdi_uuid, size):
'{}'.format(e)
)
- self.utilisation = volume_info.physical_size
+ self.utilisation = volume_info.allocated_size
self.sm_config['vdi_type'] = self.vdi_type
self.ref = self._db_introduce()
- self.sr._update_stats(volume_info.virtual_size)
+ self.sr._update_stats(self.size)
return VDI.VDI.get_params(self)
@@ -1407,14 +1817,15 @@ def delete(self, sr_uuid, vdi_uuid, data_only=False):
del self.sr.vdis[self.uuid]
# TODO: Check size after delete.
- self.sr._update_stats(-self.capacity)
+ self.sr._update_stats(-self.size)
self.sr._kick_gc()
return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only)
def attach(self, sr_uuid, vdi_uuid):
util.SMlog('LinstorVDI.attach for {}'.format(self.uuid))
+ attach_from_config = self.sr.srcmd.cmd == 'vdi_attach_from_config'
if (
- self.sr.srcmd.cmd != 'vdi_attach_from_config' or
+ not attach_from_config or
self.sr.srcmd.params['vdi_uuid'] != self.uuid
) and self.sr._journaler.has_entries(self.uuid):
raise xs_errors.XenError(
@@ -1423,50 +1834,62 @@ def attach(self, sr_uuid, vdi_uuid):
'scan SR first to trigger auto-repair'
)
- writable = 'args' not in self.sr.srcmd.params or \
- self.sr.srcmd.params['args'][0] == 'true'
+ if not attach_from_config or self.sr._is_master:
+ writable = 'args' not in self.sr.srcmd.params or \
+ self.sr.srcmd.params['args'][0] == 'true'
- # We need to inflate the volume if we don't have enough place
- # to mount the VHD image. I.e. the volume capacity must be greater
- # than the VHD size + bitmap size.
- need_inflate = True
- if self.vdi_type == vhdutil.VDI_TYPE_RAW or not writable or \
- self.capacity >= compute_volume_size(self.size, self.vdi_type):
- need_inflate = False
-
- if need_inflate:
- try:
- self._prepare_thin(True)
- except Exception as e:
- raise xs_errors.XenError(
- 'VDIUnavailable',
- opterr='Failed to attach VDI during "prepare thin": {}'
- .format(e)
- )
+ # We need to inflate the volume if we don't have enough place
+ # to mount the VHD image. I.e. the volume capacity must be greater
+ # than the VHD size + bitmap size.
+ need_inflate = True
+ if (
+ self.vdi_type == vhdutil.VDI_TYPE_RAW or
+ not writable or
+ self.capacity >= compute_volume_size(self.size, self.vdi_type)
+ ):
+ need_inflate = False
- if not util.pathexists(self.path):
- raise xs_errors.XenError(
- 'VDIUnavailable', opterr='Could not find: {}'.format(self.path)
- )
+ if need_inflate:
+ try:
+ self._prepare_thin(True)
+ except Exception as e:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Failed to attach VDI during "prepare thin": {}'
+ .format(e)
+ )
if not hasattr(self, 'xenstore_data'):
self.xenstore_data = {}
-
- # TODO: Is it useful?
- self.xenstore_data.update(scsiutil.update_XS_SCSIdata(
- self.uuid, scsiutil.gen_synthetic_page_data(self.uuid)
- ))
-
self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE
- self.attached = True
+ if (
+ USE_HTTP_NBD_SERVERS and
+ attach_from_config and
+ self.path.startswith('/dev/http-nbd/')
+ ):
+ return self._attach_using_http_nbd()
+
+ # Ensure we have a path...
+ while vdi_uuid:
+ path = self._linstor.get_device_path(vdi_uuid)
+ if not util.pathexists(path):
+ raise xs_errors.XenError(
+ 'VDIUnavailable', opterr='Could not find: {}'.format(path)
+ )
+ vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid
+ self.attached = True
return VDI.VDI.attach(self, self.sr.uuid, self.uuid)
def detach(self, sr_uuid, vdi_uuid):
util.SMlog('LinstorVDI.detach for {}'.format(self.uuid))
+ detach_from_config = self.sr.srcmd.cmd == 'vdi_detach_from_config'
self.attached = False
+ if detach_from_config and self.path.startswith('/dev/http-nbd/'):
+ return self._detach_using_http_nbd()
+
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
return
@@ -1503,9 +1926,23 @@ def detach(self, sr_uuid, vdi_uuid):
def resize(self, sr_uuid, vdi_uuid, size):
util.SMlog('LinstorVDI.resize for {}'.format(self.uuid))
+ if not self.sr._is_master:
+ raise xs_errors.XenError(
+ 'VDISize',
+ opterr='resize on slave not allowed'
+ )
+
if self.hidden:
raise xs_errors.XenError('VDIUnavailable', opterr='hidden VDI')
+ # Compute the virtual VHD and DRBD volume size.
+ size = vhdutil.validate_and_round_vhd_size(int(size))
+ volume_size = compute_volume_size(size, self.vdi_type)
+ util.SMlog(
+ 'LinstorVDI.resize: type={}, vhd-size={}, volume-size={}'
+ .format(self.vdi_type, size, volume_size)
+ )
+
if size < self.size:
util.SMlog(
'vdi_resize: shrinking not supported: '
@@ -1513,18 +1950,13 @@ def resize(self, sr_uuid, vdi_uuid, size):
)
raise xs_errors.XenError('VDISize', opterr='shrinking not allowed')
- # Compute the virtual VHD size.
- size = vhdutil.validate_and_round_vhd_size(int(size))
-
if size == self.size:
return VDI.VDI.get_params(self)
- # Compute the LINSTOR volume size.
- new_volume_size = compute_volume_size(size, self.vdi_type)
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
old_volume_size = self.size
else:
- old_volume_size = self.capacity
+ old_volume_size = self.utilisation
if self.sr._provisioning == 'thin':
# VDI is currently deflated, so keep it deflated.
new_volume_size = old_volume_size
@@ -1533,7 +1965,7 @@ def resize(self, sr_uuid, vdi_uuid, size):
space_needed = new_volume_size - old_volume_size
self.sr._ensure_space_available(space_needed)
- old_capacity = self.capacity
+ old_size = self.size
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
self._linstor.resize(self.uuid, new_volume_size)
else:
@@ -1542,7 +1974,7 @@ def resize(self, sr_uuid, vdi_uuid, size):
self.sr._journaler, self._linstor, self.uuid, self.path,
new_volume_size, old_volume_size
)
- vhdutil.setSizeVirtFast(self.path, size)
+ self.sr._vhdutil.set_size_virt_fast(self.path, size)
# Reload size attributes.
self._load_this()
@@ -1552,7 +1984,7 @@ def resize(self, sr_uuid, vdi_uuid, size):
self.session.xenapi.VDI.set_physical_utilisation(
vdi_ref, str(self.utilisation)
)
- self.sr._update_stats(self.capacity - old_capacity)
+ self.sr._update_stats(self.size - old_size)
return VDI.VDI.get_params(self)
def clone(self, sr_uuid, vdi_uuid):
@@ -1574,8 +2006,8 @@ def compose(self, sr_uuid, vdi1, vdi2):
if not blktap2.VDI.tap_pause(self.session, self.sr.uuid, self.uuid):
raise util.SMException('Failed to pause VDI {}'.format(self.uuid))
try:
- vhdutil.setParent(self.path, parent_path, False)
- vhdutil.setHidden(parent_path)
+ self.sr._vhdutil.set_parent(self.path, parent_path, False)
+ self.sr._vhdutil.set_hidden(parent_path)
self.sr.session.xenapi.VDI.set_managed(
self.sr.srcmd.params['args'][0], False
)
@@ -1598,25 +2030,40 @@ def generate_config(self, sr_uuid, vdi_uuid):
util.SMlog('LinstorVDI.generate_config for {}'.format(self.uuid))
- if not self.path or not util.pathexists(self.path):
- available = False
- # Try to refresh symlink path...
- try:
- self.path = self._linstor.get_device_path(vdi_uuid)
- available = util.pathexists(self.path)
- except Exception:
- pass
- if not available:
- raise xs_errors.XenError('VDIUnavailable')
-
resp = {}
resp['device_config'] = self.sr.dconf
resp['sr_uuid'] = sr_uuid
resp['vdi_uuid'] = self.uuid
resp['sr_sm_config'] = self.sr.sm_config
- resp['vdi_path'] = self.path
resp['command'] = 'vdi_attach_from_config'
+ # By default, we generate a normal config.
+ # But if the disk is persistent, we must use a HTTP/NBD
+ # server to ensure we can always write or read data.
+ # Why? DRBD is unsafe when used with more than 4 hosts:
+ # We are limited to use 1 diskless and 3 full.
+ # We can't increase this limitation, so we use a NBD/HTTP device
+ # instead.
+ volume_name = self._linstor.get_volume_name(self.uuid)
+ if not USE_HTTP_NBD_SERVERS or volume_name not in [
+ 'xcp-persistent-ha-statefile', 'xcp-persistent-redo-log'
+ ]:
+ if not self.path or not util.pathexists(self.path):
+ available = False
+ # Try to refresh symlink path...
+ try:
+ self.path = self._linstor.get_device_path(vdi_uuid)
+ available = util.pathexists(self.path)
+ except Exception:
+ pass
+ if not available:
+ raise xs_errors.XenError('VDIUnavailable')
+
+ resp['vdi_path'] = self.path
+ else:
+ # Axiom: DRBD device is present on at least one host.
+ resp['vdi_path'] = '/dev/http-nbd/' + volume_name
+
config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config')
return xmlrpc.client.dumps((config,), "", True)
@@ -1652,19 +2099,28 @@ def reset_leaf(self, sr_uuid, vdi_uuid):
.format(self.uuid)
)
- vhdutil.killData(self.path)
+ self.sr._vhdutil.kill_data(self.path)
def _load_this(self):
- volume_metadata = self._linstor.get_volume_metadata(self.uuid)
- volume_info = self._linstor.get_volume_info(self.uuid)
+ volume_metadata = None
+ if self.sr._all_volume_metadata_cache:
+ volume_metadata = self.sr._all_volume_metadata_cache.get(self.uuid)
+ if volume_metadata is None:
+ volume_metadata = self._linstor.get_volume_metadata(self.uuid)
+
+ volume_info = None
+ if self.sr._all_volume_info_cache:
+ volume_info = self.sr._all_volume_info_cache.get(self.uuid)
+ if volume_info is None:
+ volume_info = self._linstor.get_volume_info(self.uuid)
- # Contains the physical size used on all disks.
+ # Contains the max physical size used on a disk.
# When LINSTOR LVM driver is used, the size should be similar to
# virtual size (i.e. the LINSTOR max volume size).
# When LINSTOR Thin LVM driver is used, the used physical size should
# be lower than virtual size at creation.
# The physical size increases after each write in a new block.
- self.utilisation = volume_info.physical_size
+ self.utilisation = volume_info.allocated_size
self.capacity = volume_info.virtual_size
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
@@ -1691,7 +2147,7 @@ def _mark_hidden(self, hidden=True):
return
if self.vdi_type == vhdutil.VDI_TYPE_VHD:
- vhdutil.setHidden(self.path, hidden)
+ self.sr._vhdutil.set_hidden(self.path, hidden)
else:
self._linstor.update_volume_metadata(self.uuid, {
HIDDEN_TAG: hidden
@@ -1739,25 +2195,14 @@ def _prepare_thin(self, attach):
else:
fn = 'attach' if attach else 'detach'
- # We assume the first pool is always the one currently in use.
- pools = self.session.xenapi.pool.get_all()
- master = self.session.xenapi.pool.get_master(pools[0])
+ master = util.get_master_ref(self.session)
+
args = {
'groupName': self.sr._group_name,
'srUuid': self.sr.uuid,
'vdiUuid': self.uuid
}
- ret = self.session.xenapi.host.call_plugin(
- master, self.sr.MANAGER_PLUGIN, fn, args
- )
- util.SMlog(
- 'call-plugin ({} with {}) returned: {}'.format(fn, args, ret)
- )
- if ret == 'False':
- raise xs_errors.XenError(
- 'VDIUnavailable',
- opterr='Plugin {} failed'.format(self.sr.MANAGER_PLUGIN)
- )
+ self.sr._exec_manager_command(master, fn, args, 'VDIUnavailable')
# Reload size attrs after inflate or deflate!
self._load_this()
@@ -1807,9 +2252,7 @@ def _determine_type_and_path(self):
'VDIUnavailable',
opterr='failed to get vdi_type in metadata'
)
- self._update_device_name(
- self._linstor.get_volume_name(self.uuid)
- )
+ self._update_device_name(self._linstor.get_volume_name(self.uuid))
def _update_device_name(self, device_name):
self._device_name = device_name
@@ -1832,7 +2275,7 @@ def _create_snapshot(self, snap_uuid, snap_of_uuid=None):
# 2. Write the snapshot content.
is_raw = (self.vdi_type == vhdutil.VDI_TYPE_RAW)
- vhdutil.snapshot(
+ self.sr._vhdutil.snapshot(
snap_path, self.path, is_raw, self.MAX_METADATA_VIRT_SIZE
)
@@ -1862,7 +2305,7 @@ def _create_snapshot(self, snap_uuid, snap_of_uuid=None):
volume_info = self._linstor.get_volume_info(snap_uuid)
snap_vdi.size = self.sr._vhdutil.get_size_virt(snap_uuid)
- snap_vdi.utilisation = volume_info.physical_size
+ snap_vdi.utilisation = volume_info.allocated_size
# 6. Update sm config.
snap_vdi.sm_config = {}
@@ -1932,6 +2375,9 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
elif depth >= vhdutil.MAX_CHAIN_SIZE:
raise xs_errors.XenError('SnapshotChainTooLong')
+ # Ensure we have a valid path if we don't have a local diskful.
+ self.sr._linstor.get_device_path(self.uuid)
+
volume_path = self.path
if not util.pathexists(volume_path):
raise xs_errors.XenError(
@@ -2057,7 +2503,7 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
raise
if snap_type != VDI.SNAPSHOT_INTERNAL:
- self.sr._update_stats(self.capacity)
+ self.sr._update_stats(self.size)
# 10. Return info on the new user-visible leaf VDI.
ret_vdi = snap_vdi
@@ -2088,10 +2534,318 @@ def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
return ret_vdi.get_params()
+ @staticmethod
+ def _start_persistent_http_server(volume_name):
+ pid_path = None
+ http_server = None
+
+ try:
+ if volume_name == 'xcp-persistent-ha-statefile':
+ port = '8076'
+ else:
+ port = '8077'
+
+ try:
+ # Use a timeout call because XAPI may be unusable on startup
+ # or if the host has been ejected. So in this case the call can
+ # block indefinitely.
+ session = util.timeout_call(5, util.get_localAPI_session)
+ host_ip = util.get_this_host_address(session)
+ except:
+ # Fallback using the XHA file if session not available.
+ host_ip, _ = get_ips_from_xha_config_file()
+ if not host_ip:
+ raise Exception(
+ 'Cannot start persistent HTTP server: no XAPI session, nor XHA config file'
+ )
+
+ arguments = [
+ 'http-disk-server',
+ '--disk',
+ '/dev/drbd/by-res/{}/0'.format(volume_name),
+ '--ip',
+ host_ip,
+ '--port',
+ port
+ ]
+
+ util.SMlog('Starting {} on port {}...'.format(arguments[0], port))
+ http_server = subprocess.Popen(
+ [FORK_LOG_DAEMON] + arguments,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ # Ensure we use another group id to kill this process without
+ # touch the current one.
+ preexec_fn=os.setsid
+ )
+
+ pid_path = '/run/http-server-{}.pid'.format(volume_name)
+ with open(pid_path, 'w') as pid_file:
+ pid_file.write(str(http_server.pid))
+
+ reg_server_ready = re.compile("Server ready!$")
+ def is_ready():
+ while http_server.poll() is None:
+ line = http_server.stdout.readline()
+ if reg_server_ready.search(line):
+ return True
+ return False
+ try:
+ if not util.timeout_call(10, is_ready):
+ raise Exception('Failed to wait HTTP server startup, bad output')
+ except util.TimeoutException:
+ raise Exception('Failed to wait for HTTP server startup during given delay')
+ except Exception as e:
+ if pid_path:
+ try:
+ os.remove(pid_path)
+ except Exception:
+ pass
+
+ if http_server:
+ # Kill process and children in this case...
+ try:
+ os.killpg(os.getpgid(http_server.pid), signal.SIGTERM)
+ except:
+ pass
+
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Failed to start http-server: {}'.format(e)
+ )
+
+ def _start_persistent_nbd_server(self, volume_name):
+ pid_path = None
+ nbd_path = None
+ nbd_server = None
+
+ try:
+ # We use a precomputed device size.
+ # So if the XAPI is modified, we must update these values!
+ if volume_name == 'xcp-persistent-ha-statefile':
+ # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/xapi/xha_statefile.ml#L32-L37
+ port = '8076'
+ device_size = 4 * 1024 * 1024
+ else:
+ # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/database/redo_log.ml#L41-L44
+ port = '8077'
+ device_size = 256 * 1024 * 1024
+
+ try:
+ session = util.timeout_call(5, util.get_localAPI_session)
+ ips = util.get_host_addresses(session)
+ except Exception as e:
+ _, ips = get_ips_from_xha_config_file()
+ if not ips:
+ raise Exception(
+ 'Cannot start persistent NBD server: no XAPI session, nor XHA config file ({})'.format(e)
+ )
+ ips = ips.values()
+
+ arguments = [
+ 'nbd-http-server',
+ '--socket-path',
+ '/run/{}.socket'.format(volume_name),
+ '--nbd-name',
+ volume_name,
+ '--urls',
+ ','.join(map(lambda ip: 'http://' + ip + ':' + port, ips)),
+ '--device-size',
+ str(device_size)
+ ]
+
+ util.SMlog('Starting {} using port {}...'.format(arguments[0], port))
+ nbd_server = subprocess.Popen(
+ [FORK_LOG_DAEMON] + arguments,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ # Ensure we use another group id to kill this process without
+ # touch the current one.
+ preexec_fn=os.setsid
+ )
+
+ pid_path = '/run/nbd-server-{}.pid'.format(volume_name)
+ with open(pid_path, 'w') as pid_file:
+ pid_file.write(str(nbd_server.pid))
+
+ reg_nbd_path = re.compile("NBD `(/dev/nbd[0-9]+)` is now attached.$")
+ def get_nbd_path():
+ while nbd_server.poll() is None:
+ line = nbd_server.stdout.readline()
+ match = reg_nbd_path.search(line)
+ if match:
+ return match.group(1)
+ # Use a timeout to never block the smapi if there is a problem.
+ try:
+ nbd_path = util.timeout_call(10, get_nbd_path)
+ if nbd_path is None:
+ raise Exception('Empty NBD path (NBD server is probably dead)')
+ except util.TimeoutException:
+ raise Exception('Unable to read NBD path')
+
+ util.SMlog('Create symlink: {} -> {}'.format(self.path, nbd_path))
+ os.symlink(nbd_path, self.path)
+ except Exception as e:
+ if pid_path:
+ try:
+ os.remove(pid_path)
+ except Exception:
+ pass
+
+ if nbd_path:
+ try:
+ os.remove(nbd_path)
+ except Exception:
+ pass
+
+ if nbd_server:
+ # Kill process and children in this case...
+ try:
+ os.killpg(os.getpgid(nbd_server.pid), signal.SIGTERM)
+ except:
+ pass
+
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Failed to start nbd-server: {}'.format(e)
+ )
+
+ @classmethod
+ def _kill_persistent_server(self, type, volume_name, sig):
+ try:
+ path = '/run/{}-server-{}.pid'.format(type, volume_name)
+ if not os.path.exists(path):
+ return
+
+ pid = None
+ with open(path, 'r') as pid_file:
+ try:
+ pid = int(pid_file.read())
+ except Exception:
+ pass
+
+ if pid is not None and util.check_pid_exists(pid):
+ util.SMlog('Kill {} server {} (pid={})'.format(type, path, pid))
+ try:
+ os.killpg(os.getpgid(pid), sig)
+ except Exception as e:
+ util.SMlog('Failed to kill {} server: {}'.format(type, e))
+
+ os.remove(path)
+ except:
+ pass
+
+ @classmethod
+ def _kill_persistent_http_server(self, volume_name, sig=signal.SIGTERM):
+ return self._kill_persistent_server('nbd', volume_name, sig)
+
+ @classmethod
+ def _kill_persistent_nbd_server(self, volume_name, sig=signal.SIGTERM):
+ return self._kill_persistent_server('http', volume_name, sig)
+
+ def _check_http_nbd_volume_name(self):
+ volume_name = self.path[14:]
+ if volume_name not in [
+ 'xcp-persistent-ha-statefile', 'xcp-persistent-redo-log'
+ ]:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Unsupported path: {}'.format(self.path)
+ )
+ return volume_name
+
+ def _attach_using_http_nbd(self):
+ volume_name = self._check_http_nbd_volume_name()
+
+ # Ensure there is no NBD and HTTP server running.
+ self._kill_persistent_nbd_server(volume_name)
+ self._kill_persistent_http_server(volume_name)
+
+ # 0. Fetch drbd path.
+ must_get_device_path = True
+ if not self.sr._is_master:
+ # We are on a slave, we must try to find a diskful locally.
+ try:
+ volume_info = self._linstor.get_volume_info(self.uuid)
+ except Exception as e:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Cannot get volume info of {}: {}'
+ .format(self.uuid, e)
+ )
+
+ hostname = socket.gethostname()
+ must_get_device_path = hostname in volume_info.diskful
+
+ drbd_path = None
+ if must_get_device_path or self.sr._is_master:
+ # If we are master, we must ensure we have a diskless
+ # or diskful available to init HA.
+ # It also avoid this error in xensource.log
+ # (/usr/libexec/xapi/cluster-stack/xhad/ha_set_pool_state):
+ # init exited with code 8 [stdout = ''; stderr = 'SF: failed to write in State-File \x10 (fd 4208696). (sys 28)\x0A']
+ # init returned MTC_EXIT_CAN_NOT_ACCESS_STATEFILE (State-File is inaccessible)
+ available = False
+ try:
+ drbd_path = self._linstor.get_device_path(self.uuid)
+ available = util.pathexists(drbd_path)
+ except Exception:
+ pass
+
+ if not available:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Cannot get device path of {}'.format(self.uuid)
+ )
+
+ # 1. Prepare http-nbd folder.
+ try:
+ if not os.path.exists('/dev/http-nbd/'):
+ os.makedirs('/dev/http-nbd/')
+ elif os.path.islink(self.path):
+ os.remove(self.path)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Cannot prepare http-nbd: {}'.format(e)
+ )
+
+ # 2. Start HTTP service if we have a diskful or if we are master.
+ http_service = None
+ if drbd_path:
+ assert(drbd_path in (
+ '/dev/drbd/by-res/xcp-persistent-ha-statefile/0',
+ '/dev/drbd/by-res/xcp-persistent-redo-log/0'
+ ))
+ self._start_persistent_http_server(volume_name)
+
+ # 3. Start NBD server in all cases.
+ try:
+ self._start_persistent_nbd_server(volume_name)
+ except Exception as e:
+ if drbd_path:
+ self._kill_persistent_http_server(volume_name)
+ raise
+
+ self.attached = True
+ return VDI.VDI.attach(self, self.sr.uuid, self.uuid)
+
+ def _detach_using_http_nbd(self):
+ volume_name = self._check_http_nbd_volume_name()
+ self._kill_persistent_nbd_server(volume_name)
+ self._kill_persistent_http_server(volume_name)
+
# ------------------------------------------------------------------------------
if __name__ == '__main__':
- SRCommand.run(LinstorSR, DRIVER_INFO)
+ def run():
+ SRCommand.run(LinstorSR, DRIVER_INFO)
+
+ if not TRACE_PERFS:
+ run()
+ else:
+ util.make_profile('LinstorSR', run)
else:
SR.registerSR(LinstorSR)
diff --git a/drivers/blktap2.py b/drivers/blktap2.py
index 3a419aadf..7b4735636 100755
--- a/drivers/blktap2.py
+++ b/drivers/blktap2.py
@@ -49,6 +49,12 @@
from xmlrpc.client import ServerProxy, Transport
from socket import socket, AF_UNIX, SOCK_STREAM
+try:
+ from linstorvolumemanager import log_drbd_openers
+ LINSTOR_AVAILABLE = True
+except ImportError:
+ LINSTOR_AVAILABLE = False
+
PLUGIN_TAP_PAUSE = "tapdisk-pause"
SOCKPATH = "/var/xapi/xcp-rrdd"
@@ -811,7 +817,22 @@ def launch_on_tap(cls, blktap, path, _type, options):
TapCtl.attach(pid, minor)
try:
- TapCtl.open(pid, minor, _type, path, options)
+ retry_open = 0
+ while True:
+ try:
+ TapCtl.open(pid, minor, _type, path, options)
+ except TapCtl.CommandFailure as e:
+ err = (
+ 'status' in e.info and e.info['status']
+ ) or None
+ if err in (errno.EIO, errno.EROFS, errno.EAGAIN):
+ if retry_open < 5:
+ retry_open += 1
+ time.sleep(1)
+ continue
+ if LINSTOR_AVAILABLE and err == errno.EROFS:
+ log_drbd_openers(path)
+ break
try:
tapdisk = cls.__from_blktap(blktap)
node = '/sys/dev/block/%d:%d' % (tapdisk.major(), tapdisk.minor)
diff --git a/drivers/cleanup.py b/drivers/cleanup.py
index fe9f996cd..1fda9f69a 100755
--- a/drivers/cleanup.py
+++ b/drivers/cleanup.py
@@ -53,8 +53,10 @@
try:
from linstorjournaler import LinstorJournaler
from linstorvhdutil import LinstorVhdUtil
- from linstorvolumemanager \
- import LinstorVolumeManager, LinstorVolumeManagerError
+ from linstorvolumemanager import get_controller_uri
+ from linstorvolumemanager import LinstorVolumeManager
+ from linstorvolumemanager import LinstorVolumeManagerError
+
LINSTOR_AVAILABLE = True
except ImportError:
LINSTOR_AVAILABLE = False
@@ -477,7 +479,7 @@ def set_task_status(self, status):
#
# VDI
#
-class VDI:
+class VDI(object):
"""Object representing a VDI of a VHD-based SR"""
POLL_INTERVAL = 1
@@ -760,6 +762,12 @@ def delete(self):
lock.Lock.cleanupAll(self.uuid)
self._clear()
+ def getParent(self):
+ return vhdutil.getParent(self.path, lambda x: x.strip())
+
+ def repair(self, parent):
+ vhdutil.repair(parent)
+
def __str__(self):
strHidden = ""
if self.hidden:
@@ -874,12 +882,14 @@ def _reportCoalesceError(vdi, ce):
xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
_reportCoalesceError = staticmethod(_reportCoalesceError)
+ def coalesce(self):
+ vhdutil.coalesce(self.path)
+
def _doCoalesceVHD(vdi):
try:
-
startTime = time.time()
vhdSize = vdi.getSizeVHD()
- vhdutil.coalesce(vdi.path)
+ vdi.coalesce()
endTime = time.time()
vdi.sr.recordStorageSpeed(startTime, endTime, vhdSize)
except util.CommandException as ce:
@@ -918,13 +928,12 @@ def _coalesceVHD(self, timeOut):
# Try a repair and reraise the exception
parent = ""
try:
- parent = vhdutil.getParent(self.path, lambda x: x.strip())
- if not self._vdi_is_raw(parent):
- # Repair error is logged and ignored. Error reraised later
- util.SMlog('Coalesce failed on %s, attempting repair on ' \
- 'parent %s' % (self.uuid, parent))
- vhdutil.repair(parent)
- except Exception as e:
+ parent = self.getParent()
+ # Repair error is logged and ignored. Error reraised later
+ util.SMlog('Coalesce failed on %s, attempting repair on ' \
+ 'parent %s' % (self.uuid, parent))
+ self.repair(parent)
+ except Exception, e:
util.SMlog('(error ignored) Failed to repair parent %s ' \
'after failed coalesce on %s, err: %s' %
(parent, self.path, e))
@@ -1509,17 +1518,28 @@ def delete(self):
self.sr.unlock()
VDI.delete(self)
- def pauseVDIs(self, vdiList):
- self.sr._linstor.ensure_volume_list_is_not_locked(
- vdiList, timeout=self.VOLUME_LOCK_TIMEOUT
- )
- return super(VDI).pauseVDIs(vdiList)
+ def validate(self, fast=False):
+ if not self.sr._vhdutil.check(self.uuid, fast=fast):
+ raise util.SMException('VHD {} corrupted'.format(self))
- def _liveLeafCoalesce(self, vdi):
+ def pause(self, failfast=False):
self.sr._linstor.ensure_volume_is_not_locked(
- vdi.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
+ self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
+ )
+ return super(LinstorVDI, self).pause(failfast)
+
+ def coalesce(self):
+ self.sr._vhdutil.force_coalesce(self.path)
+
+ def getParent(self):
+ return self.sr._vhdutil.get_parent(
+ self.sr._linstor.get_volume_uuid_from_device_path(self.path)
+ )
+
+ def repair(self, parent_uuid):
+ self.sr._vhdutil.force_repair(
+ self.sr._linstor.get_device_path(parent_uuid)
)
- return super(VDI)._liveLeafCoalesce(vdi)
def _relinkSkip(self):
abortFlag = IPCFlag(self.sr.uuid)
@@ -1545,6 +1565,19 @@ def _relinkSkip(self):
blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
self.children = []
+ def _setParent(self, parent):
+ self.sr._vhdutil.force_parent(self.path, parent.path)
+ self.parent = parent
+ self.parentUuid = parent.uuid
+ parent.children.append(self)
+ try:
+ self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
+ Util.log("Updated the vhd-parent field for child %s with %s" % \
+ (self.uuid, self.parentUuid))
+ except:
+ Util.log("Failed to update %s with vhd-parent field %s" % \
+ (self.uuid, self.parentUuid))
+
def _setHidden(self, hidden=True):
HIDDEN_TAG = 'hidden'
@@ -1563,7 +1596,7 @@ def _queryVHDBlocks(self):
#
# SR
#
-class SR:
+class SR(object):
class LogFilter:
def __init__(self, sr):
self.sr = sr
@@ -2955,7 +2988,6 @@ def __init__(self, uuid, xapi, createLock, force):
)
SR.__init__(self, uuid, xapi, createLock, force)
- self._master_uri = 'linstor://localhost'
self.path = LinstorVolumeManager.DEV_ROOT_PATH
self._reloadLinstor()
@@ -2982,6 +3014,12 @@ def scan(self, force=False):
self.logFilter.logState()
self._handleInterruptedCoalesceLeaf()
+ def pauseVDIs(self, vdiList):
+ self._linstor.ensure_volume_list_is_not_locked(
+ vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
+ )
+ return super(LinstorSR, self).pauseVDIs(vdiList)
+
def _reloadLinstor(self):
session = self.xapi.session
host_ref = util.get_this_host_ref(session)
@@ -2994,12 +3032,13 @@ def _reloadLinstor(self):
dconf = session.xenapi.PBD.get_device_config(pbd)
group_name = dconf['group-name']
+ controller_uri = get_controller_uri()
self.journaler = LinstorJournaler(
- self._master_uri, group_name, logger=util.SMlog
+ controller_uri, group_name, logger=util.SMlog
)
self._linstor = LinstorVolumeManager(
- self._master_uri,
+ controller_uri,
group_name,
repair=True,
logger=util.SMlog
@@ -3032,8 +3071,8 @@ def _load_vdi_info(self):
# TODO: Ensure metadata contains the right info.
- all_volume_info = self._linstor.volumes_with_info
- volumes_metadata = self._linstor.volumes_with_metadata
+ all_volume_info = self._linstor.get_volumes_with_info()
+ volumes_metadata = self._linstor.get_volumes_with_metadata()
for vdi_uuid, volume_info in all_volume_info.items():
try:
if not volume_info.name and \
@@ -3048,7 +3087,7 @@ def _load_vdi_info(self):
except Exception as e:
Util.log(
' [VDI {}: failed to load VDI info]: {}'
- .format(self.uuid, e)
+ .format(vdi_uuid, e)
)
info = vhdutil.VHDInfo(vdi_uuid)
info.error = 1
@@ -3064,8 +3103,10 @@ def _calcExtraSpaceNeeded(self, child, parent):
virtual_size = LinstorVolumeManager.round_up_volume_size(
parent.sizeVirt + meta_overhead + bitmap_overhead
)
- # TODO: Check result.
- return virtual_size - self._linstor.get_volume_size(parent.uuid)
+ volume_size = self._linstor.get_volume_size(parent.uuid)
+
+ assert virtual_size >= volume_size
+ return virtual_size - volume_size
def _hasValidDevicePath(self, uuid):
try:
@@ -3075,6 +3116,16 @@ def _hasValidDevicePath(self, uuid):
return False
return True
+ def _liveLeafCoalesce(self, vdi):
+ self.lock()
+ try:
+ self._linstor.ensure_volume_is_not_locked(
+ vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
+ )
+ return super(LinstorSR, self)._liveLeafCoalesce(vdi)
+ finally:
+ self.unlock()
+
def _handleInterruptedCoalesceLeaf(self):
entries = self.journaler.get_all(VDI.JRN_LEAF)
for uuid, parentUuid in entries.items():
@@ -3101,7 +3152,6 @@ def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
)
parent.rename(parentUuid)
- util.fistpoint.activate('LVHDRT_coaleaf_undo_after_rename', self.uuid)
child = self.getVDI(childUuid)
if not child:
@@ -3117,9 +3167,6 @@ def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
Util.log('Updating the VDI record')
child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
- util.fistpoint.activate(
- 'LVHDRT_coaleaf_undo_after_rename2', self.uuid
- )
# TODO: Maybe deflate here.
@@ -3128,10 +3175,7 @@ def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
if not parent.hidden:
parent._setHidden(True)
self._updateSlavesOnUndoLeafCoalesce(parent, child)
- util.fistpoint.activate('LVHDRT_coaleaf_undo_end', self.uuid)
Util.log('*** leaf-coalesce undo successful')
- if util.fistpoint.is_active('LVHDRT_coaleaf_stop_after_recovery'):
- child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
Util.log('*** FINISH LEAF-COALESCE')
@@ -3144,32 +3188,21 @@ def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
except XenAPI.Failure:
pass
self._updateSlavesOnResize(vdi)
- util.fistpoint.activate('LVHDRT_coaleaf_finish_end', self.uuid)
Util.log('*** finished leaf-coalesce successfully')
def _checkSlaves(self, vdi):
try:
- states = self._linstor.get_usage_states(vdi.uuid)
- for node_name, state in states.items():
- self._checkSlave(node_name, vdi, state)
+ all_openers = self._linstor.get_volume_openers(vdi.uuid)
+ for openers in all_openers.itervalues():
+ for opener in openers.values():
+ if opener['process-name'] != 'tapdisk':
+ raise util.SMException(
+ 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
+ )
except LinstorVolumeManagerError as e:
if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
raise
- @staticmethod
- def _checkSlave(node_name, vdi, state):
- # If state is None, LINSTOR doesn't know the host state
- # (bad connection?).
- if state is None:
- raise util.SMException(
- 'Unknown state for VDI {} on {}'.format(vdi.uuid, node_name)
- )
-
- if state:
- raise util.SMException(
- 'VDI {} is in use on {}'.format(vdi.uuid, node_name)
- )
-
################################################################################
#
diff --git a/drivers/linstor-manager b/drivers/linstor-manager
index f7ce18099..9e96aacac 100755
--- a/drivers/linstor-manager
+++ b/drivers/linstor-manager
@@ -14,32 +14,52 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+# We must modify default import path, we don't want to import modules
+# installed in plugins folder and instead we must import from LINSTOR driver
+# folder.
+import sys
+sys.path[0] = '/opt/xensource/sm/'
+
import base64
import distutils.util
-import subprocess
-import sys
+import os
+import socket
+import XenAPI
import XenAPIPlugin
-sys.path.append('/opt/xensource/sm/')
from linstorjournaler import LinstorJournaler
-from linstorvolumemanager import LinstorVolumeManager
+from linstorvolumemanager import get_controller_uri, get_local_volume_openers, LinstorVolumeManager
from lock import Lock
import json
import LinstorSR
+import re
import util
import vhdutil
+BACKING_DISK_RE = re.compile('^/dev/([^/]+)/(?:[^/]+)$')
+LVM_PLUGIN = 'lvm.py'
+THIN_POOL = 'thin_pool'
FIREWALL_PORT_SCRIPT = '/etc/xapi.d/plugins/firewall-port'
-LINSTOR_PORTS = [3366, 3370, 3376, 3377, '7000:8000']
+LINSTOR_PORTS = [3366, 3370, 3376, 3377, 8076, 8077]
+DRBD_PORTS = '7000:8000'
+
+DRBD_REACTOR_CONF = '/etc/drbd-reactor.d/sm-linstor.toml'
+DRBD_REACTOR_CONF_CONTENT = """[[promoter]]
-def get_linstor_uri(session):
- return 'linstor://{}'.format(util.get_master_rec(session)['address'])
+[promoter.resources.xcp-persistent-database]
+start = [ "var-lib-linstor.service", "linstor-controller.service" ]
+"""
+DRBD_REACTOR_DEPS = [
+ '/run/systemd/system/linstor-controller.service.d/reactor.conf',
+ '/run/systemd/system/var-lib-linstor.service.d/reactor.conf'
+]
-def update_port(port, open):
- fn = 'open' if open else 'close'
+
+def update_linstor_port(port, open_ports):
+ fn = 'open' if open_ports else 'close'
args = (
FIREWALL_PORT_SCRIPT, fn, str(port), 'tcp'
)
@@ -50,28 +70,238 @@ def update_port(port, open):
raise Exception('Failed to {} port: {} {}'.format(fn, out, err))
-def update_all_ports(open):
- for port in LINSTOR_PORTS:
- update_port(port, open)
+def has_iptables_rule(rule):
+ (ret, stdout, stderr) = util.doexec(['iptables', '-C'] + rule)
+ return not ret
-def update_service(start):
- fn = 'enable' if start else 'disable'
- args = ('systemctl', fn, '--now', 'linstor-satellite')
- (ret, out, err) = util.doexec(args)
- if ret == 0:
+def update_drbd_ports(open_ports):
+ # We want to use a static rule regarding DRBD volumes,
+ # so we can't use the XAPI firewall port script, we have to manually
+ # check for existing rules before updating iptables service.
+ rule = ['INPUT', '-p', 'tcp', '--dport', DRBD_PORTS, '-j', 'ACCEPT']
+ if open_ports == has_iptables_rule(rule):
return
- raise Exception('Failed to {} satellite: {} {}'.format(fn, out, err))
+ if open_ports:
+ rule.insert(1, '1')
+ (ret, stdout, stderr) = util.doexec(['iptables', '-I'] + rule)
+ if ret:
+ raise Exception('Failed to add DRBD rule: {}'.format(stderr))
+ else:
+ (ret, stdout, stderr) = util.doexec(['iptables', '-D'] + rule)
+ if ret:
+ raise Exception('Failed to remove DRBD rule: {}'.format(stderr))
+ (ret, stdout, stderr) = util.doexec(['service', 'iptables', 'save'])
+ if ret:
+ raise Exception('Failed to save DRBD rule: {}'.format(stderr))
+
+
+def update_all_ports(open_ports):
+ for port in LINSTOR_PORTS:
+ update_linstor_port(port, open_ports)
+ update_drbd_ports(open_ports)
+
+
+def update_linstor_satellite_service(start):
+ service = 'linstor-satellite'
+
+ # Stop services in all cases first.
+ # Ensure we don't have an invalid cache used by a satellite.
+ # (We found an issue with a new added disk which used a volume group name
+ # formerly involved by another disk. To avoid this kind of problem, we
+ # always restart the satellite.)
+ util.enable_and_start_service(service, False)
+ if start:
+ util.enable_and_start_service(service, True)
+
+
+def update_drbd_reactor_service(start):
+ if start:
+ util.atomicFileWrite(DRBD_REACTOR_CONF, None, DRBD_REACTOR_CONF_CONTENT)
+ else:
+ try:
+ os.remove(DRBD_REACTOR_CONF)
+ except Exception:
+ pass
+
+ util.stop_service('drbd-reactor')
+
+ try:
+ util.stop_service('drbd-promote@xcp\x2dpersistent\x2ddatabase.service')
+ except Exception as e:
+ if str(e).rstrip().endswith(' not loaded.'):
+ pass
+ raise e
+
+ util.stop_service('linstor-controller')
+ util.stop_service('var-lib-linstor.service')
+
+ for dep in DRBD_REACTOR_DEPS:
+ try:
+ os.remove(dep)
+ except Exception:
+ pass
+
+ util.doexec(['systemctl', 'daemon-reload'])
+ util.enable_and_start_service('drbd-reactor', start)
+
+
+def exec_create_sr(session, name, description, disks, volume_group, redundancy, provisioning, force):
+ disk_hostnames = disks.keys()
+ thin = provisioning == 'thin'
+
+ # Create volumes.
+ hosts = session.xenapi.host.get_all_records()
+ hostnames = []
+ for host_ref, host_record in hosts.items():
+ hostname = host_record['hostname']
+ hostnames.append(hostname)
+
+ if force:
+ try:
+ session.xenapi.host.call_plugin(
+ host_ref, LVM_PLUGIN, 'destroy_volume_group', {
+ 'vg_name': volume_group,
+ 'force': 'True'
+ }
+ )
+ except Exception as e:
+ try:
+ response = session.xenapi.host.call_plugin(
+ host_ref, LVM_PLUGIN, 'list_volume_groups', {
+ 'vg_name': volume_group
+ }
+ )
+ if response != '{}':
+ raise e
+ except Exception:
+ raise e
+
+ if hostname not in disk_hostnames or not disks[hostname]:
+ if force or session.xenapi.host.call_plugin(
+ host_ref, LVM_PLUGIN, 'list_volume_groups', {
+ 'vg_name': volume_group
+ }
+ ) == '{}':
+ continue
+ raise Exception('Volume group should not exist on `{}`, you must remove it manually'.format(hostname))
+
+ host_disks = disks[hostname]
+ if type(host_disks) is list:
+ host_disks = ','.join(disks[hostname])
+ else:
+ raise Exception('Disk value of `{}` must be a disk list'.format(hostname))
+
+ session.xenapi.host.call_plugin(
+ host_ref, LVM_PLUGIN, 'create_physical_volume', {
+ 'devices': host_disks,
+ 'force': str(force)
+ }
+ )
+
+ session.xenapi.host.call_plugin(
+ host_ref, LVM_PLUGIN, 'create_volume_group', {
+ 'vg_name': volume_group,
+ 'devices': host_disks
+ }
+ )
+
+ if thin:
+ session.xenapi.host.call_plugin(
+ host_ref, LVM_PLUGIN, 'create_thin_pool', {
+ 'vg_name': volume_group,
+ 'lv_name': THIN_POOL
+ }
+ )
+
+ # Create SR.
+ master_ref = session.xenapi.pool.get_all_records().values()[0]['master']
+
+ device_config = {
+ 'redundancy': str(redundancy),
+ 'provisioning': 'thin' if thin else 'thick',
+ 'group-name': '{}/{}'.format(volume_group, THIN_POOL) if thin else volume_group,
+ 'hosts': ','.join(hostnames),
+ 'monitor-db-quorum': str(len(hostnames) > 2)
+ }
+ sr_ref = session.xenapi.SR.create(
+ master_ref, device_config, '0', name, description, 'linstor', '', True, {}
+ )
+ return session.xenapi.SR.get_uuid(sr_ref)
+
+
+def get_drbd_volumes(volume_group=None):
+ drbd_volumes = {}
+ (ret, stdout, stderr) = util.doexec(['drbdsetup', 'show', '--json'])
+ if ret:
+ raise Exception('Failed to get JSON object: {}'.format(stderr))
+
+ config = json.loads(stdout)
+ for resource in config:
+ for volume in resource['_this_host']['volumes']:
+ backing_disk = volume['backing-disk']
+ match = BACKING_DISK_RE.match(backing_disk)
+ if not match:
+ continue
+
+ cur_volume_group = match.groups()[0]
+ if volume_group and cur_volume_group != volume_group:
+ continue
+
+ minor = int(volume['device_minor'])
+ if cur_volume_group in drbd_volumes:
+ drbd_volumes[cur_volume_group].append(minor)
+ else:
+ drbd_volumes[cur_volume_group] = [minor]
+ return drbd_volumes
+
+
+def force_destroy_drbd_volume(minor):
+ (ret, stdout, stderr) = util.doexec(['drbdsetup', 'detach', minor, '--force'])
+ if ret:
+ raise Exception('Failed to detach volume: {}'.format(stderr))
+ (ret, stdout, stderr) = util.doexec(['drbdsetup', 'del-minor', minor])
+ if ret:
+ raise Exception('Failed to destroy volume: {}'.format(stderr))
+
+# ------------------------------------------------------------------------------
+
+
+def prepare_sr(session, args):
+ try:
+ LinstorSR.activate_lvm_group(args['groupName'])
+
+ update_all_ports(open_ports=True)
+ # We don't want to enable and start drbd-reactor daemon during
+ # SR creation.
+ update_drbd_reactor_service(start=False)
+ update_linstor_satellite_service(start=True)
+ return str(True)
+ except Exception as e:
+ util.SMlog('linstor-manager:prepare_sr error: {}'.format(e))
+ return str(False)
+
+
+def release_sr(session, args):
+ try:
+ update_linstor_satellite_service(start=False)
+ update_drbd_reactor_service(start=False)
+ update_all_ports(open_ports=False)
+ return str(True)
+ except Exception as e:
+ util.SMlog('linstor-manager:release_sr error: {}'.format(e))
+ return str(False)
-def enable(session, args):
+def update_drbd_reactor(session, args):
try:
enabled = distutils.util.strtobool(args['enabled'])
- update_all_ports(open=enabled)
- update_service(start=enabled)
+ update_drbd_reactor_service(start=enabled)
return str(True)
except Exception as e:
- util.SMlog('linstor-manager:disable error: {}'.format(e))
+ util.SMlog(
+ 'linstor-manager:update_drbd_reactor error: {}'.format(e)
+ )
return str(False)
@@ -81,12 +311,12 @@ def attach(session, args):
vdi_uuid = args['vdiUuid']
group_name = args['groupName']
- linstor_uri = get_linstor_uri(session)
+ controller_uri = get_controller_uri()
journaler = LinstorJournaler(
- linstor_uri, group_name, logger=util.SMlog
+ controller_uri, group_name, logger=util.SMlog
)
linstor = LinstorVolumeManager(
- linstor_uri,
+ controller_uri,
group_name,
logger=util.SMlog
)
@@ -104,7 +334,7 @@ def detach(session, args):
group_name = args['groupName']
linstor = LinstorVolumeManager(
- get_linstor_uri(session),
+ get_controller_uri(),
group_name,
logger=util.SMlog
)
@@ -115,10 +345,37 @@ def detach(session, args):
return str(False)
+def destroy(session, args):
+ try:
+ group_name = args['groupName']
+
+ # When destroy is called, there are no running drbd-reactor daemons.
+ # So the controllers are stopped too, we must start an instance.
+ util.restart_service('var-lib-linstor.service')
+ util.restart_service('linstor-controller')
+
+ linstor = LinstorVolumeManager(
+ 'linstor://localhost',
+ group_name,
+ logger=util.SMlog
+ )
+ linstor.destroy()
+ return str(True)
+ except Exception as e:
+ util.stop_service('linstor-controller')
+ util.stop_service('var-lib-linstor.service')
+ util.SMlog('linstor-manager:destroy error: {}'.format(e))
+ return str(False)
+
+
def check(session, args):
try:
device_path = args['devicePath']
- return str(vhdutil.check(device_path))
+ ignore_missing_footer = distutils.util.strtobool(
+ args['ignoreMissingFooter']
+ )
+ fast = distutils.util.strtobool(args['fast'])
+ return str(vhdutil.check(device_path, ignore_missing_footer, fast))
except Exception as e:
util.SMlog('linstor-manager:check error: {}'.format(e))
raise
@@ -131,7 +388,7 @@ def get_vhd_info(session, args):
include_parent = distutils.util.strtobool(args['includeParent'])
linstor = LinstorVolumeManager(
- get_linstor_uri(session),
+ get_controller_uri(),
group_name,
logger=util.SMlog
)
@@ -143,7 +400,7 @@ def get_vhd_info(session, args):
)
vhd_info = vhdutil.getVHDInfo(
- device_path, extract_uuid, include_parent
+ device_path, extract_uuid, include_parent, False
)
return json.dumps(vhd_info.__dict__)
except Exception as e:
@@ -166,7 +423,7 @@ def get_parent(session, args):
group_name = args['groupName']
linstor = LinstorVolumeManager(
- get_linstor_uri(session),
+ get_controller_uri(),
group_name,
logger=util.SMlog
)
@@ -228,6 +485,37 @@ def get_block_bitmap(session, args):
raise
+def set_parent(session, args):
+ try:
+ device_path = args['devicePath']
+ parent_path = args['parentPath']
+ vhdutil.setParent(device_path, parent_path, False)
+ return ''
+ except Exception as e:
+ util.SMlog('linstor-manager:set_parent error: {}'.format(e))
+ raise
+
+
+def coalesce(session, args):
+ try:
+ device_path = args['devicePath']
+ vhdutil.coalesce(device_path)
+ return ''
+ except Exception as e:
+ util.SMlog('linstor-manager:coalesce error: {}'.format(e))
+ raise
+
+
+def repair(session, args):
+ try:
+ device_path = args['devicePath']
+ vhdutil.repair(device_path)
+ return ''
+ except Exception as e:
+ util.SMlog('linstor-manager:repair error: {}'.format(e))
+ raise
+
+
def lock_vdi(session, args):
lock = None
try:
@@ -236,10 +524,13 @@ def lock_vdi(session, args):
group_name = args['groupName']
locked = distutils.util.strtobool(args['locked'])
+ # We must lock to mark the VDI.
lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid)
+ if locked:
+ lock.acquire()
linstor = LinstorVolumeManager(
- get_linstor_uri(session),
+ get_controller_uri(),
group_name,
logger=util.SMlog
)
@@ -249,16 +540,523 @@ def lock_vdi(session, args):
except Exception as e:
util.SMlog('linstor-manager:lock_vdi error: {}'.format(e))
finally:
- if lock:
+ if locked and lock:
lock.release()
return str(False)
+def has_controller_running(session, args):
+ (ret, stdout, stderr) = util.doexec([
+ 'systemctl', 'is-active', '--quiet', 'linstor-controller'
+ ])
+ return str(ret == 0)
+
+
+def add_host(session, args):
+ group_name = args['groupName']
+
+ # 1. Find SR and PBDs.
+ srs = dict()
+ for sr_ref, sr in session.xenapi.SR.get_all_records().items():
+ if sr.get('type') == 'linstor':
+ srs[sr_ref] = sr
+
+ pbds = dict()
+ for pbd_ref, pbd in session.xenapi.PBD.get_all_records().items():
+ device_config = pbd.get('device_config')
+ if (
+ device_config and
+ device_config.get('group-name') == group_name
+ and pbd['SR'] in srs
+ ):
+ pbds[pbd_ref] = pbd
+
+ # 2. Ensure there is at least one PBD and all PBDs are used in
+ # the same SR.
+ if not pbds:
+ raise Exception(
+ 'Failed to find PBDs of group `{}`'.format(group_name)
+ )
+
+ sr_ref = None
+ for pbd in pbds.values():
+ if not sr_ref:
+ sr_ref = pbd['SR']
+ elif pbd['SR'] != sr_ref:
+ raise Exception(
+ 'Group `{}` is used by many SRs!'.format(group_name)
+ )
+
+ # 3. Ensure node doesn't exist.
+ linstor = LinstorVolumeManager(
+ get_controller_uri(),
+ group_name,
+ logger=util.SMlog
+ )
+
+ node_name = socket.gethostname()
+ has_node = linstor.has_node(node_name)
+
+ pbd_id = 0
+ new_pbd_ref = None
+
+ try:
+ # 4. Enable services.
+ update_all_ports(open_ports=True)
+ update_drbd_reactor_service(start=True)
+ update_linstor_satellite_service(start=True)
+
+ # 5. Try to create local node.
+ if not has_node:
+ linstor.create_node(node_name, util.get_this_host_address(session))
+
+ # 6. Recreate PBDs.
+ # Use the redundancy given by Linstor instead of smapi config.
+ redundancy = linstor.redundancy
+ default_device_config = None
+ this_host = util.get_this_host_ref(session)
+ create_new_pbd = True
+
+ assert pbds
+ pbds = pbds.items()
+ for pbd_ref, pbd in pbds:
+ device_config = pbd['device_config']
+
+ hosts = filter(
+ lambda host: len(host.strip()),
+ device_config.get('hosts', []).split(',')
+ )
+ hosts.append(node_name)
+ hosts = ','.join(list(set(hosts)))
+
+ # Should be the same on all hosts.
+ provisioning = device_config['provisioning']
+
+ if not default_device_config:
+ default_device_config = {
+ 'group-name': group_name,
+ 'redundancy': redundancy,
+ 'hosts': hosts,
+ 'provisioning': provisioning
+ }
+
+ if pbd['currently_attached']:
+ session.xenapi.PBD.unplug(pbd_ref)
+ session.xenapi.PBD.destroy(pbd_ref)
+ pbd_id += 1
+
+ host = pbd['host']
+ if host == this_host:
+ create_new_pbd = False
+
+ pbd_ref = session.xenapi.PBD.create({
+ 'host': host,
+ 'SR': sr_ref,
+ 'device_config': {
+ 'group-name': group_name,
+ 'redundancy': redundancy,
+ 'hosts': hosts,
+ 'provisioning': provisioning
+ }
+ })
+ try:
+ session.xenapi.PBD.plug(pbd_ref)
+ except Exception as e:
+ util.SMlog('Failed to replug PBD: {}'.format(e))
+
+ # 7. Create new PBD.
+ if create_new_pbd:
+ new_pbd_ref = session.xenapi.PBD.create({
+ 'host': this_host,
+ 'SR': sr_ref,
+ 'device_config': default_device_config
+ })
+ try:
+ session.xenapi.PBD.plug(new_pbd_ref)
+ except Exception as e:
+ util.SMlog('Failed to plug new PBD: {}'.format(e))
+
+ return str(True)
+ except Exception as e:
+ stop_services = not has_node
+ if stop_services:
+ try:
+ linstor.destroy_node(node_name)
+ except Exception:
+ pass
+
+ for pbd_ref, pbd in pbds[:pbd_id]:
+ try:
+ session.xenapi.PBD.unplug(pbd_ref)
+ except Exception:
+ pass
+
+ try:
+ session.xenapi.PBD.destroy(pbd_ref)
+ except Exception:
+ pass
+
+ try:
+ device_config = pbd['device_config']
+ session.xenapi.PBD.create({
+ 'host': host,
+ 'SR': sr_ref,
+ 'device_config': {
+ 'group-name': group_name,
+ 'redundancy': redundancy,
+ 'hosts': device_config['hosts'],
+ 'provisioning': device_config['provisioning']
+ }
+ })
+ except Exception as pbd_error:
+ util.SMlog('Failed to recreate PBD: {}'.format(pbd_error))
+ pass
+
+ try:
+ session.xenapi.PBD.plug(pbd_ref)
+ except Exception:
+ pass
+
+ if new_pbd_ref:
+ try:
+ session.xenapi.PBD.unplug(new_pbd_ref)
+ except Exception:
+ pass
+
+ try:
+ session.xenapi.PBD.destroy(new_pbd_ref)
+ except Exception:
+ pass
+
+ try:
+ # If we failed to remove the node, we don't stop services.
+ if stop_services and not linstor.has_node(node_name):
+ update_linstor_satellite_service(start=False)
+ update_drbd_reactor_service(start=False)
+ update_all_ports(open_ports=False)
+ except Exception:
+ pass
+
+ raise e
+
+
+def remove_host(session, args):
+ group_name = args['groupName']
+
+ # 1. Find SRs and PBDs.
+ srs = dict()
+ for sr_ref, sr in session.xenapi.SR.get_all_records().items():
+ if sr.get('type') == 'linstor':
+ srs[sr_ref] = sr
+
+ pbds = dict()
+ for pbd_ref, pbd in session.xenapi.PBD.get_all_records().items():
+ device_config = pbd.get('device_config')
+ if (
+ device_config and
+ device_config.get('group-name') == group_name
+ and pbd['SR'] in srs
+ ):
+ pbds[pbd_ref] = pbd
+
+ # 2. Remove node.
+ linstor = LinstorVolumeManager(
+ get_controller_uri(),
+ group_name,
+ logger=util.SMlog
+ )
+
+ node_name = socket.gethostname()
+ if linstor.has_node(node_name):
+ linstor.destroy_node(node_name)
+ if linstor.has_node(node_name):
+ raise Exception('Failed to remove node! Unknown error.')
+
+ redundancy = linstor.redundancy
+ this_host = util.get_this_host_ref(session)
+
+ # 3. Update PBDs.
+ for pbd_ref, pbd in pbds.items():
+ host = pbd['host']
+ if host == this_host:
+ if pbd['currently_attached']:
+ session.xenapi.PBD.unplug(pbd_ref)
+ session.xenapi.PBD.destroy(pbd_ref)
+ continue
+
+ device_config = pbd['device_config']
+ hosts = device_config.get('hosts', []).split(',')
+ try:
+ hosts.remove(node_name)
+ except Exception as e:
+ continue
+ hosts = ','.join(list(set(hosts)))
+
+ if pbd['currently_attached']:
+ session.xenapi.PBD.unplug(pbd_ref)
+ session.xenapi.PBD.destroy(pbd_ref)
+
+ pbd_ref = session.xenapi.PBD.create({
+ 'host': host,
+ 'SR': pbd['SR'],
+ 'device_config': {
+ 'group-name': group_name,
+ 'redundancy': redundancy,
+ 'hosts': hosts,
+ 'provisioning': device_config['provisioning']
+ }
+ })
+
+ try:
+ session.xenapi.PBD.plug(pbd_ref)
+ except Exception as e:
+ util.SMlog('Failed to replug PBD: {}'.format(e))
+
+ # 3. Stop services.
+ try:
+ update_linstor_satellite_service(start=False)
+ update_drbd_reactor_service(start=False)
+ update_all_ports(open_ports=False)
+ except Exception as e:
+ util.SMlog('Error while stopping services: {}'.format(e))
+ pass
+
+ return str('True')
+
+
+def create_sr(session, args):
+ try:
+ # Use a complex parsing contrary to the other functions because
+ # this helper is a public method and is not easy to use.
+ name = args.get('name')
+ if not name:
+ raise Exception('`name` is empty')
+
+ description = args.get('description') or ''
+
+ disks = args.get('disks')
+ if not disks:
+ raise Exception('`disks` is empty')
+ try:
+ disks = json.loads(disks)
+ except Exception as e:
+ raise Exception('failed to decode `disks`: {}'.format(e))
+ if type(disks) is not dict:
+ raise Exception('`disks` must be a JSON object')
+
+ volume_group = args.get('volume_group')
+ if not volume_group:
+ raise Exception('`volume_group` is empty')
+
+ redundancy = args.get('redundancy')
+ if not redundancy:
+ raise Exception('`redundancy` is empty')
+
+ try:
+ redundancy = int(redundancy)
+ except Exception:
+ raise Exception('`redundancy` is not a number')
+
+ provisioning = args.get('provisioning')
+ if not provisioning:
+ provisioning = 'thin'
+ elif provisioning != 'thin' and provisioning != 'thick':
+ raise Exception('unsupported provisioning')
+
+ force = distutils.util.strtobool(args.get('force') or '0')
+
+ return exec_create_sr(
+ session, name, description, disks, volume_group, redundancy, provisioning, force
+ )
+ except Exception as e:
+ util.SMlog('linstor-manager:create_sr error: {}'.format(e))
+ raise
+
+
+def demote_drbd_resource(session, args):
+ try:
+ resource_name = args['resource_name']
+ (ret, stdout, stderr) = util.doexec(['drbdsetup', 'secondary', resource_name])
+ if ret:
+ raise Exception('Failed to demote resource: {}'.format(stderr))
+ return str(True)
+ except Exception as e:
+ util.SMlog('linstor-manager:demote_drbd_resource error: {}'.format(e))
+ return str(False)
+
+
+def list_drbd_volumes(session, args):
+ try:
+ volume_group = args.get('volume_group')
+ return json.dumps(get_drbd_volumes(volume_group))
+ except Exception as e:
+ util.SMlog('linstor-manager:list_drbd_volumes error: {}'.format(e))
+ raise
+
+
+def destroy_drbd_volume(session, args):
+ try:
+ minor = args.get('minor')
+ if not minor:
+ raise Exception('Cannot destroy DRBD volume without minor.')
+ force_destroy_drbd_volume(minor)
+ return str(True)
+ except Exception as e:
+ util.SMlog('linstor-manager:destroy_drbd_volume error: {}'.format(e))
+ return str(False)
+
+
+def destroy_drbd_volumes(session, args):
+ try:
+ volume_group = args.get('volume_group')
+ if not volume_group:
+ raise Exception('Cannot destroy DRBD volumes without volume group.')
+ for minor in get_drbd_volumes(volume_group).get(volume_group, []):
+ force_destroy_drbd_volume(str(minor))
+ return str(True)
+ except Exception as e:
+ util.SMlog('linstor-manager:destroy_drbd_volumes error: {}'.format(e))
+ return str(False)
+
+
+def get_drbd_openers(session, args):
+ try:
+ resource_name = args.get('resourceName')
+ volume = args.get('volume')
+ return get_local_volume_openers(resource_name, volume)
+ except Exception as e:
+ util.SMlog('linstor-manager:get_drbd_openers error: {}'.format(e))
+ raise
+
+
+def health_check(session, args):
+ group_name = args['groupName']
+
+ result = {
+ 'controller-uri': '',
+ 'nodes': {},
+ 'storage-pools': {},
+ 'warnings': [],
+ 'errors': []
+ }
+
+ def format_result():
+ return json.dumps(result)
+
+ # 1. Get controller.
+ try:
+ controller_uri = get_controller_uri()
+
+ result['controller-uri'] = controller_uri
+ try:
+ if controller_uri == 'linstor://localhost':
+ # Replace `localhost` with IP to give a better info for users.
+ result['controller-uri'] = 'linstor://' + util.get_this_host_address(session)
+ except Exception:
+ # Ignore error: can be a XAPI restart or something else.
+ pass
+
+ linstor = LinstorVolumeManager(
+ controller_uri,
+ group_name,
+ logger=util.SMlog
+ )
+ except Exception as e:
+ # Probably a network issue, or offline controller.
+ result['errors'].append('Cannot join SR: `{}`.'.format(e))
+ return format_result()
+
+ try:
+ # 2. Check node statuses.
+ nodes = linstor.get_nodes_info()
+ result['nodes'] = nodes
+ for node_name, status in nodes.items():
+ if status != 'ONLINE':
+ result['warnings'].append('Node `{}` is {}.'.format(node_name, status))
+
+ # 3. Check storage pool statuses.
+ storage_pools_per_node = linstor.get_storage_pools_info()
+ result['storage-pools'] = storage_pools_per_node
+ for node_name, storage_pools in storage_pools_per_node.items():
+ for storage_pool in storage_pools:
+ free_size = storage_pool['free-size']
+ capacity = storage_pool['capacity']
+ if free_size < 0 or capacity <= 0:
+ result['errors'].append(
+ 'Cannot get free size and/or capacity of storage pool `{}`.'
+ .format(storage_pool['uuid'])
+ )
+ elif free_size > capacity:
+ result['errors'].append(
+ 'Free size of storage pool `{}` is greater than capacity.'
+ .format(storage_pool['uuid'])
+ )
+ else:
+ remaining_percent = free_size / float(capacity) * 100.0
+ threshold = 10.0
+ if remaining_percent < threshold:
+ result['warnings'].append(
+ 'Remaining size of storage pool `{}` is below {}% of its capacity.'
+ .format(storage_pool['uuid'], threshold)
+ )
+
+ # 4. Check resource statuses.
+ all_resources = linstor.get_resources_info()
+ result['resources'] = all_resources
+
+ for resource_name, resource_by_node in all_resources.items():
+ for node_name, resource in resource_by_node.items():
+ for volume_index, volume in enumerate(resource['volumes']):
+ disk_state = volume['disk-state']
+ if disk_state in ['UpToDate', 'Created', 'Attached']:
+ continue
+ if disk_state == 'DUnknown':
+ result['warnings'].append(
+ 'Unknown state for volume `{}` at index {} for resource `{}` on node `{}`'
+ .format(volume['device-path'], volume_index, resource_name, node_name)
+ )
+ continue
+ if disk_state in ['Inconsistent', 'Failed', 'To: Creating', 'To: Attachable', 'To: Attaching']:
+ result['errors'].append(
+ 'Invalid state `{}` for volume `{}` at index {} for resource `{}` on node `{}`'
+ .format(disk_state, volume['device-path'], volume_index, resource_name, node_name)
+ )
+ continue
+ if disk_state == 'Diskless':
+ if resource['diskful']:
+ result['errors'].append(
+ 'Unintentional diskless state detected for volume `{}` at index {} for resource `{}` on node `{}`'
+ .format(volume['device-path'], volume_index, resource_name, node_name)
+ )
+ elif resource['tie-breaker']:
+ volume['disk-state'] = 'TieBreaker'
+ continue
+ result['warnings'].append(
+ 'Unhandled state `{}` for volume `{}` at index {} for resource `{}` on node `{}`'
+ .format(disk_state, volume['device-path'], volume_index, resource_name, node_name)
+ )
+
+ except Exception as e:
+ result['errors'].append('Unexpected error: `{}`'.format(e))
+
+ return format_result()
+
+
if __name__ == '__main__':
XenAPIPlugin.dispatch({
- 'enable': enable,
+ 'prepareSr': prepare_sr,
+ 'releaseSr': release_sr,
+ 'updateDrbdReactor': update_drbd_reactor,
'attach': attach,
'detach': detach,
+ 'destroy': destroy,
+
+ # vhdutil wrappers called by linstorvhdutil.
+ # Note: When a VHD is open in RO mode (so for all vhdutil getters),
+ # the LVM layer is used directly to bypass DRBD verifications.
+ # In this case there can't be EROFS errors.
+ # Note 2: We assume linstorvhdutil executes remote calls on diskful
+ # DRBDs, otherwise we still have EROFS errors...
'check': check,
'getVHDInfo': get_vhd_info,
'hasParent': has_parent,
@@ -268,5 +1066,22 @@ if __name__ == '__main__':
'getDepth': get_depth,
'getKeyHash': get_key_hash,
'getBlockBitmap': get_block_bitmap,
- 'lockVdi': lock_vdi
+
+ # Called by cleanup.py to coalesce when a primary
+ # is opened on a non-local host.
+ 'setParent': set_parent,
+ 'coalesce': coalesce,
+ 'repair': repair,
+
+ 'lockVdi': lock_vdi,
+ 'hasControllerRunning': has_controller_running,
+ 'addHost': add_host,
+ 'removeHost': remove_host,
+ 'createSr': create_sr,
+ 'listDrbdVolumes': list_drbd_volumes,
+ 'demoteDrbdResource': demote_drbd_resource,
+ 'destroyDrbdVolume': destroy_drbd_volume,
+ 'destroyDrbdVolumes': destroy_drbd_volumes,
+ 'getDrbdOpeners': get_drbd_openers,
+ 'healthCheck': health_check
})
diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py
index bc7cff7c2..a61d9f11b 100755
--- a/drivers/linstorjournaler.py
+++ b/drivers/linstorjournaler.py
@@ -16,7 +16,8 @@
#
-from linstorvolumemanager import LinstorVolumeManager
+from linstorvolumemanager import \
+ get_controller_uri, LinstorVolumeManager, LinstorVolumeManagerError
import linstor
import re
import util
@@ -52,20 +53,10 @@ def __init__(self, uri, group_name, logger=default_logger.__func__):
self._namespace = '{}journal/'.format(
LinstorVolumeManager._build_sr_namespace()
)
-
- def connect():
- self._journal = linstor.KV(
- LinstorVolumeManager._build_group_name(group_name),
- uri=uri,
- namespace=self._namespace
- )
-
- util.retry(
- connect,
- maxretry=60,
- exceptions=[linstor.errors.LinstorNetworkError]
- )
self._logger = logger
+ self._journal = self._create_journal_instance(
+ uri, group_name, self._namespace
+ )
def create(self, type, identifier, value):
# TODO: Maybe rename to 'add' in the future (in Citrix code too).
@@ -116,6 +107,7 @@ def remove(self, type, identifier):
)
def get(self, type, identifier):
+ self._reset_namespace()
return self._journal.get(self._get_key(type, identifier))
def get_all(self, type):
@@ -150,6 +142,34 @@ def hasJournals(self, identifier):
def _reset_namespace(self):
self._journal.namespace = self._namespace
+ @classmethod
+ def _create_journal_instance(cls, uri, group_name, namespace):
+ def connect(uri):
+ if not uri:
+ uri = get_controller_uri()
+ if not uri:
+ raise LinstorVolumeManagerError(
+ 'Unable to find controller uri...'
+ )
+ return linstor.KV(
+ LinstorVolumeManager._build_group_name(group_name),
+ uri=uri,
+ namespace=namespace
+ )
+
+ try:
+ return connect(uri)
+ except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
+ pass
+
+ return util.retry(
+ lambda: connect(None),
+ maxretry=10,
+ exceptions=[
+ linstor.errors.LinstorNetworkError, LinstorVolumeManagerError
+ ]
+ )
+
@staticmethod
def _get_key(type, identifier):
return '{}/{}'.format(type, identifier)
diff --git a/drivers/linstorvhdutil.py b/drivers/linstorvhdutil.py
index 7a1356627..83d7f8be5 100644
--- a/drivers/linstorvhdutil.py
+++ b/drivers/linstorvhdutil.py
@@ -25,9 +25,46 @@
MANAGER_PLUGIN = 'linstor-manager'
+# EMEDIUMTYPE constant (124) is not available in python2.
+EMEDIUMTYPE = 124
+
+
+def call_vhd_util_on_host(session, host_ref, method, device_path, args):
+ try:
+ response = session.xenapi.host.call_plugin(
+ host_ref, MANAGER_PLUGIN, method, args
+ )
+ except Exception as e:
+ util.SMlog('call-plugin ({} with {}) exception: {}'.format(
+ method, args, e
+ ))
+ raise
+
+ util.SMlog('call-plugin ({} with {}) returned: {}'.format(
+ method, args, response
+ ))
+
+ return response
+
+
+class LinstorCallException(Exception):
+ def __init__(self, cmd_err):
+ self.cmd_err = cmd_err
+
+ def __str__(self):
+ return str(self.cmd_err)
+
+
+class ErofsLinstorCallException(LinstorCallException):
+ pass
+
+
+class NoPathLinstorCallException(LinstorCallException):
+ pass
+
def linstorhostcall(local_method, remote_method):
- def decorated(func):
+ def decorated(response_parser):
def wrapper(*args, **kwargs):
self = args[0]
vdi_uuid = args[1]
@@ -41,50 +78,56 @@ def wrapper(*args, **kwargs):
# Try to read locally if the device is not in use or if the device
# is up to date and not diskless.
- (node_names, in_use) = \
- self._linstor.find_up_to_date_diskfull_nodes(vdi_uuid)
+ (node_names, in_use_by) = \
+ self._linstor.find_up_to_date_diskful_nodes(vdi_uuid)
+ local_e = None
try:
- if not in_use or socket.gethostname() in node_names:
- return local_method(device_path, *args[2:], **kwargs)
- except util.CommandException as e:
- # EMEDIUMTYPE constant (124) is not available in python2.
- if e.code != errno.EROFS and e.code != 124:
- raise
+ if not in_use_by or socket.gethostname() in node_names:
+ return self._call_local_vhd_util(local_method, device_path, *args[2:], **kwargs)
+ except ErofsLinstorCallException as e:
+ local_e = e.cmd_err
+ except Exception as e:
+ local_e = e
+
+ util.SMlog(
+ 'unable to execute `{}` locally, retry using a readable host... (cause: {})'.format(
+ remote_method, local_e if local_e else 'local diskless + in use or not up to date'
+ )
+ )
+
+ if in_use_by:
+ node_names = {in_use_by}
# B. Execute the plugin on master or slave.
- def exec_remote_method():
- host_ref = self._get_readonly_host(
- vdi_uuid, device_path, node_names
- )
- args = {
- 'devicePath': device_path,
- 'groupName': self._linstor.group_name
- }
- args.update(**kwargs)
+ remote_args = {
+ 'devicePath': device_path,
+ 'groupName': self._linstor.group_name
+ }
+ remote_args.update(**kwargs)
+ remote_args = {str(key): str(value) for key, value in remote_args.iteritems()}
- try:
- response = self._session.xenapi.host.call_plugin(
- host_ref, MANAGER_PLUGIN, remote_method, args
- )
- except Exception as e:
- util.SMlog('call-plugin ({} with {}) exception: {}'.format(
- remote_method, args, e
- ))
- raise
-
- util.SMlog('call-plugin ({} with {}) returned: {}'.format(
- remote_method, args, response
- ))
- if response == 'False':
- raise xs_errors.XenError(
- 'VDIUnavailable',
- opterr='Plugin {} failed'.format(MANAGER_PLUGIN)
- )
- kwargs['response'] = response
-
- util.retry(exec_remote_method, 5, 3)
- return func(*args, **kwargs)
+ try:
+ def remote_call():
+ host_ref = self._get_readonly_host(vdi_uuid, device_path, node_names)
+ return call_vhd_util_on_host(self._session, host_ref, remote_method, device_path, remote_args)
+ response = util.retry(remote_call, 5, 2)
+ except Exception as remote_e:
+ self._raise_openers_exception(device_path, local_e or remote_e)
+
+ return response_parser(self, vdi_uuid, response)
+ return wrapper
+ return decorated
+
+
+def linstormodifier():
+ def decorated(func):
+ def wrapper(*args, **kwargs):
+ self = args[0]
+
+ ret = func(*args, **kwargs)
+ self._linstor.invalidate_resource_cache()
+ return ret
return wrapper
return decorated
@@ -94,17 +137,33 @@ def __init__(self, session, linstor):
self._session = session
self._linstor = linstor
+ # --------------------------------------------------------------------------
+ # Getters: read locally and try on another host in case of failure.
+ # --------------------------------------------------------------------------
+
+ def check(self, vdi_uuid, ignore_missing_footer=False, fast=False):
+ kwargs = {
+ 'ignoreMissingFooter': ignore_missing_footer,
+ 'fast': fast
+ }
+ return self._check(vdi_uuid, **kwargs) # pylint: disable = E1123
+
@linstorhostcall(vhdutil.check, 'check')
- def check(self, vdi_uuid, **kwargs):
- return distutils.util.strtobool(kwargs['response'])
+ def _check(self, vdi_uuid, response):
+ return distutils.util.strtobool(response)
def get_vhd_info(self, vdi_uuid, include_parent=True):
- kwargs = {'includeParent': str(include_parent)}
- return self._get_vhd_info(vdi_uuid, self._extract_uuid, **kwargs)
+ kwargs = {
+ 'includeParent': include_parent,
+ 'resolveParent': False
+ }
+ # TODO: Replace pylint comment with this feature when possible:
+ # https://github.com/PyCQA/pylint/pull/2926
+ return self._get_vhd_info(vdi_uuid, self._extract_uuid, **kwargs) # pylint: disable = E1123
@linstorhostcall(vhdutil.getVHDInfo, 'getVHDInfo')
- def _get_vhd_info(self, vdi_uuid, *args, **kwargs):
- obj = json.loads(kwargs['response'])
+ def _get_vhd_info(self, vdi_uuid, response):
+ obj = json.loads(response)
vhd_info = vhdutil.VHDInfo(vdi_uuid)
vhd_info.sizeVirt = obj['sizeVirt']
@@ -118,35 +177,91 @@ def _get_vhd_info(self, vdi_uuid, *args, **kwargs):
return vhd_info
@linstorhostcall(vhdutil.hasParent, 'hasParent')
- def has_parent(self, vdi_uuid, **kwargs):
- return distutils.util.strtobool(kwargs['response'])
+ def has_parent(self, vdi_uuid, response):
+ return distutils.util.strtobool(response)
def get_parent(self, vdi_uuid):
return self._get_parent(vdi_uuid, self._extract_uuid)
@linstorhostcall(vhdutil.getParent, 'getParent')
- def _get_parent(self, vdi_uuid, *args, **kwargs):
- return kwargs['response']
+ def _get_parent(self, vdi_uuid, response):
+ return response
@linstorhostcall(vhdutil.getSizeVirt, 'getSizeVirt')
- def get_size_virt(self, vdi_uuid, **kwargs):
- return int(kwargs['response'])
+ def get_size_virt(self, vdi_uuid, response):
+ return int(response)
@linstorhostcall(vhdutil.getSizePhys, 'getSizePhys')
- def get_size_phys(self, vdi_uuid, **kwargs):
- return int(kwargs['response'])
+ def get_size_phys(self, vdi_uuid, response):
+ return int(response)
@linstorhostcall(vhdutil.getDepth, 'getDepth')
- def get_depth(self, vdi_uuid, **kwargs):
- return int(kwargs['response'])
+ def get_depth(self, vdi_uuid, response):
+ return int(response)
@linstorhostcall(vhdutil.getKeyHash, 'getKeyHash')
- def get_key_hash(self, vdi_uuid, **kwargs):
- return kwargs['response'] or None
+ def get_key_hash(self, vdi_uuid, response):
+ return response or None
@linstorhostcall(vhdutil.getBlockBitmap, 'getBlockBitmap')
- def get_block_bitmap(self, vdi_uuid, **kwargs):
- return base64.b64decode(kwargs['response'])
+ def get_block_bitmap(self, vdi_uuid, response):
+ return base64.b64decode(response)
+
+ # --------------------------------------------------------------------------
+ # Setters: only used locally.
+ # --------------------------------------------------------------------------
+
+ @linstormodifier()
+ def create(self, path, size, static, msize=0):
+ return self._call_local_vhd_util_or_fail(vhdutil.create, path, size, static, msize)
+
+ @linstormodifier()
+ def set_size_virt_fast(self, path, size):
+ return self._call_local_vhd_util_or_fail(vhdutil.setSizeVirtFast, path, size)
+
+ @linstormodifier()
+ def set_size_phys(self, path, size, debug=True):
+ return self._call_local_vhd_util_or_fail(vhdutil.setSizePhys, path, size, debug)
+
+ @linstormodifier()
+ def set_parent(self, path, parentPath, parentRaw=False):
+ return self._call_local_vhd_util_or_fail(vhdutil.setParent, path, parentPath, parentRaw)
+
+ @linstormodifier()
+ def set_hidden(self, path, hidden=True):
+ return self._call_local_vhd_util_or_fail(vhdutil.setHidden, path, hidden)
+
+ @linstormodifier()
+ def set_key(self, path, key_hash):
+ return self._call_local_vhd_util_or_fail(vhdutil.setKey, path, key_hash)
+
+ @linstormodifier()
+ def kill_data(self, path):
+ return self._call_local_vhd_util_or_fail(vhdutil.killData, path)
+
+ @linstormodifier()
+ def snapshot(self, path, parent, parentRaw, msize=0, checkEmpty=True):
+ return self._call_local_vhd_util_or_fail(vhdutil.snapshot, path, parent, parentRaw, msize, checkEmpty)
+
+ # --------------------------------------------------------------------------
+ # Remote setters: write locally and try on another host in case of failure.
+ # --------------------------------------------------------------------------
+
+ @linstormodifier()
+ def force_parent(self, path, parentPath, parentRaw=False):
+ kwargs = {
+ 'parentPath': str(parentPath),
+ 'parentRaw': parentRaw
+ }
+ return self._call_vhd_util(vhdutil.setParent, 'setParent', path, use_parent=False, **kwargs)
+
+ @linstormodifier()
+ def force_coalesce(self, path):
+ return self._call_vhd_util(vhdutil.coalesce, 'coalesce', path, use_parent=True)
+
+ @linstormodifier()
+ def force_repair(self, path):
+ return self._call_vhd_util(vhdutil.repair, 'repair', path, use_parent=False)
# --------------------------------------------------------------------------
# Helpers.
@@ -161,7 +276,7 @@ def _extract_uuid(self, device_path):
def _get_readonly_host(self, vdi_uuid, device_path, node_names):
"""
When vhd-util is called to fetch VDI info we must find a
- diskfull DRBD disk to read the data. It's the goal of this function.
+ diskful DRBD disk to read the data. It's the goal of this function.
Why? Because when a VHD is open in RO mode, the LVM layer is used
directly to bypass DRBD verifications (we can have only one process
that reads/writes to disk with DRBD devices).
@@ -170,7 +285,7 @@ def _get_readonly_host(self, vdi_uuid, device_path, node_names):
if not node_names:
raise xs_errors.XenError(
'VDIUnavailable',
- opterr='Unable to find diskfull node: {} (path={})'
+ opterr='Unable to find diskful node: {} (path={})'
.format(vdi_uuid, device_path)
)
@@ -184,3 +299,134 @@ def _get_readonly_host(self, vdi_uuid, device_path, node_names):
opterr='Unable to find a valid host from VDI: {} (path={})'
.format(vdi_uuid, device_path)
)
+
+ # --------------------------------------------------------------------------
+
+ def _raise_openers_exception(self, device_path, e):
+ if isinstance(e, util.CommandException):
+ e_str = 'cmd: `{}`, code: `{}`, reason: `{}`'.format(e.cmd, e.code, e.reason)
+ else:
+ e_str = str(e)
+
+ e_with_openers = None
+ try:
+ volume_uuid = self._linstor.get_volume_uuid_from_device_path(
+ device_path
+ )
+ e_wrapper = Exception(
+ e_str + ' (openers: {})'.format(
+ self._linstor.get_volume_openers(volume_uuid)
+ )
+ )
+ except Exception as illformed_e:
+ e_wrapper = Exception(
+ e_str + ' (unable to get openers: {})'.format(illformed_e)
+ )
+ util.SMlog('raise opener exception: {}'.format(e_wrapper))
+ raise e_wrapper # pylint: disable = E0702
+
+ def _call_local_vhd_util(self, local_method, device_path, *args, **kwargs):
+ try:
+ def local_call():
+ try:
+ return local_method(device_path, *args, **kwargs)
+ except util.CommandException as e:
+ if e.code == errno.EROFS or e.code == EMEDIUMTYPE:
+ raise ErofsLinstorCallException(e) # Break retry calls.
+ if e.code == errno.ENOENT:
+ raise NoPathLinstorCallException(e)
+ raise e
+ # Retry only locally if it's not an EROFS exception.
+ return util.retry(local_call, 5, 2, exceptions=[util.CommandException])
+ except util.CommandException as e:
+ util.SMlog('failed to execute locally vhd-util (sys {})'.format(e.code))
+ raise e
+
+ def _call_local_vhd_util_or_fail(self, local_method, device_path, *args, **kwargs):
+ try:
+ return self._call_local_vhd_util(local_method, device_path, *args, **kwargs)
+ except ErofsLinstorCallException as e:
+ # Volume is locked on a host, find openers.
+ self._raise_openers_exception(device_path, e.cmd_err)
+
+ def _call_vhd_util(self, local_method, remote_method, device_path, use_parent, *args, **kwargs):
+ # Note: `use_parent` exists to know if the VHD parent is used by the local/remote method.
+ # Normally in case of failure, if the parent is unused we try to execute the method on
+ # another host using the DRBD opener list. In the other case, if the parent is required,
+ # we must check where this last one is open instead of the child.
+
+ # A. Try to write locally...
+ try:
+ return self._call_local_vhd_util(local_method, device_path, *args, **kwargs)
+ except Exception:
+ pass
+
+ util.SMlog('unable to execute `{}` locally, retry using a writable host...'.format(remote_method))
+
+ # B. Execute the command on another host.
+ # B.1. Get host list.
+ try:
+ hosts = self._session.xenapi.host.get_all_records()
+ except Exception as e:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Unable to get host list to run vhd-util command `{}` (path={}): {}'
+ .format(remote_method, device_path, e)
+ )
+
+ # B.2. Prepare remote args.
+ remote_args = {
+ 'devicePath': device_path,
+ 'groupName': self._linstor.group_name
+ }
+ remote_args.update(**kwargs)
+ remote_args = {str(key): str(value) for key, value in remote_args.iteritems()}
+
+ volume_uuid = self._linstor.get_volume_uuid_from_device_path(
+ device_path
+ )
+ parent_volume_uuid = None
+ if use_parent:
+ parent_volume_uuid = self.get_parent(volume_uuid)
+
+ openers_uuid = parent_volume_uuid if use_parent else volume_uuid
+
+ # B.3. Call!
+ def remote_call():
+ try:
+ all_openers = self._linstor.get_volume_openers(openers_uuid)
+ except Exception as e:
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='Unable to get DRBD openers to run vhd-util command `{}` (path={}): {}'
+ .format(remote_method, device_path, e)
+ )
+
+ no_host_found = True
+ for hostname, openers in all_openers.iteritems():
+ if not openers:
+ continue
+
+ try:
+ host_ref = next(ref for ref, rec in hosts.iteritems() if rec['hostname'] == hostname)
+ except StopIteration:
+ continue
+
+ no_host_found = False
+ try:
+ return call_vhd_util_on_host(self._session, host_ref, remote_method, device_path, remote_args)
+ except Exception:
+ pass
+
+ if no_host_found:
+ try:
+ return local_method(device_path, *args, **kwargs)
+ except Exception as e:
+ self._raise_openers_exception(device_path, e)
+
+ raise xs_errors.XenError(
+ 'VDIUnavailable',
+ opterr='No valid host found to run vhd-util command `{}` (path=`{}`, openers=`{}`): {}'
+ .format(remote_method, device_path, openers, e)
+ )
+ return util.retry(remote_call, 5, 2)
diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py
index 182b88992..a9f39e03c 100755
--- a/drivers/linstorvolumemanager.py
+++ b/drivers/linstorvolumemanager.py
@@ -16,15 +16,103 @@
#
+import distutils.util
+import errno
+import glob
import json
import linstor
import os.path
import re
+import shutil
import socket
+import stat
import time
import util
+import uuid
+# Contains the data of the "/var/lib/linstor" directory.
+DATABASE_VOLUME_NAME = 'xcp-persistent-database'
+DATABASE_SIZE = 1 << 30 # 1GB.
+DATABASE_PATH = '/var/lib/linstor'
+DATABASE_MKFS = 'mkfs.ext4'
+
+REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
+REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
+
+DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
+
+PLUGIN = 'linstor-manager'
+
+
+# ==============================================================================
+
+def get_local_volume_openers(resource_name, volume):
+ if not resource_name or volume is None:
+ raise Exception('Cannot get DRBD openers without resource name and/or volume.')
+
+ path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
+ resource_name, volume
+ )
+
+ with open(path, 'r') as openers:
+ # Not a big cost, so read all lines directly.
+ lines = openers.readlines()
+
+ result = {}
+
+ opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
+ for line in lines:
+ match = opener_re.match(line)
+ assert match
+
+ groups = match.groups()
+ process_name = groups[0]
+ pid = groups[1]
+ open_duration_ms = groups[2]
+ result[pid] = {
+ 'process-name': process_name,
+ 'open-duration': open_duration_ms
+ }
+
+ return json.dumps(result)
+
+def get_all_volume_openers(resource_name, volume):
+ PLUGIN_CMD = 'getDrbdOpeners'
+
+ volume = str(volume)
+ openers = {}
+
+ # Make sure this call never stucks because this function can be called
+ # during HA init and in this case we can wait forever.
+ session = util.timeout_call(10, util.get_localAPI_session)
+
+ hosts = session.xenapi.host.get_all_records()
+ for host_ref, host_record in hosts.items():
+ node_name = host_record['hostname']
+ try:
+ if not session.xenapi.host_metrics.get_record(
+ host_record['metrics']
+ )['live']:
+ # Ensure we call plugin on online hosts only.
+ continue
+
+ openers[node_name] = json.loads(
+ session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
+ 'resourceName': resource_name,
+ 'volume': volume
+ })
+ )
+ except Exception as e:
+ util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
+ resource_name, node_name, e
+ ))
+
+ return openers
+
+
+# ==============================================================================
+
def round_up(value, divisor):
assert divisor
divisor = int(divisor)
@@ -37,6 +125,148 @@ def round_down(value, divisor):
return value - (value % int(divisor))
+# ==============================================================================
+
+def get_remote_host_ip(node_name):
+ (ret, stdout, stderr) = util.doexec([
+ 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
+ ])
+ if ret != 0:
+ return
+
+ try:
+ conf = json.loads(stdout)
+ if not conf:
+ return
+
+ for connection in conf[0]['connections']:
+ if connection['net']['_name'] == node_name:
+ value = connection['path']['_remote_host']
+ res = REG_DRBDSETUP_IP.match(value)
+ if res:
+ return res.groups()[0]
+ break
+ except Exception:
+ pass
+
+
+def _get_controller_uri():
+ PLUGIN_CMD = 'hasControllerRunning'
+
+ # Try to find controller using drbdadm.
+ (ret, stdout, stderr) = util.doexec([
+ 'drbdadm', 'status', DATABASE_VOLUME_NAME
+ ])
+ if ret == 0:
+ # If we are here, the database device exists locally.
+
+ if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
+ # Nice case, we have the controller running on this local host.
+ return 'linstor://localhost'
+
+ # Try to find the host using DRBD connections.
+ res = REG_DRBDADM_PRIMARY.search(stdout)
+ if res:
+ node_name = res.groups()[0]
+ ip = get_remote_host_ip(node_name)
+ if ip:
+ return 'linstor://' + ip
+
+ # Worst case: we use many hosts in the pool (>= 4), so we can't find the
+ # primary using drbdadm because we don't have all connections to the
+ # replicated volume. `drbdadm status xcp-persistent-database` returns
+ # 3 connections by default.
+ try:
+ session = util.timeout_call(10, util.get_localAPI_session)
+
+ for host_ref, host_record in session.xenapi.host.get_all_records().items():
+ node_name = host_record['hostname']
+ try:
+ if distutils.util.strtobool(
+ session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
+ ):
+ return 'linstor://' + host_record['address']
+ except Exception as e:
+ # Can throw and exception if a host is offline. So catch it.
+ util.SMlog('Unable to search controller on `{}`: {}'.format(
+ node_name, e
+ ))
+ except:
+ # Not found, maybe we are trying to create the SR...
+ pass
+
+def get_controller_uri():
+ retries = 0
+ while True:
+ uri = _get_controller_uri()
+ if uri:
+ return uri
+
+ retries += 1
+ if retries >= 10:
+ break
+ time.sleep(1)
+
+
+def get_controller_node_name():
+ PLUGIN_CMD = 'hasControllerRunning'
+
+ (ret, stdout, stderr) = util.doexec([
+ 'drbdadm', 'status', DATABASE_VOLUME_NAME
+ ])
+
+ if ret == 0:
+ if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
+ return 'localhost'
+
+ res = REG_DRBDADM_PRIMARY.search(stdout)
+ if res:
+ return res.groups()[0]
+
+ session = util.timeout_call(5, util.get_localAPI_session)
+
+ for host_ref, host_record in session.xenapi.host.get_all_records().items():
+ node_name = host_record['hostname']
+ try:
+ if not session.xenapi.host_metrics.get_record(
+ host_record['metrics']
+ )['live']:
+ continue
+
+ if distutils.util.strtobool(session.xenapi.host.call_plugin(
+ host_ref, PLUGIN, PLUGIN_CMD, {}
+ )):
+ return node_name
+ except Exception as e:
+ util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
+ node_name, e
+ ))
+
+
+def demote_drbd_resource(node_name, resource_name):
+ PLUGIN_CMD = 'demoteDrbdResource'
+
+ session = util.timeout_call(5, util.get_localAPI_session)
+
+ for host_ref, host_record in session.xenapi.host.get_all_records().items():
+ if host_record['hostname'] != node_name:
+ continue
+
+ try:
+ session.xenapi.host.call_plugin(
+ host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
+ )
+ except Exception as e:
+ util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
+ resource_name, node_name, e
+ ))
+ raise Exception(
+ 'Can\'t demote resource `{}`, unable to find node `{}`'
+ .format(resource_name, node_name)
+ )
+
+# ==============================================================================
+
class LinstorVolumeManagerError(Exception):
ERR_GENERIC = 0,
ERR_VOLUME_EXISTS = 1,
@@ -50,6 +280,7 @@ def __init__(self, message, code=ERR_GENERIC):
def code(self):
return self._code
+
# ==============================================================================
# Note:
@@ -63,7 +294,17 @@ class LinstorVolumeManager(object):
A volume in this context is a physical part of the storage layer.
"""
- DEV_ROOT_PATH = '/dev/drbd/by-res/'
+ __slots__ = (
+ '_linstor', '_logger',
+ '_uri', '_base_group_name',
+ '_redundancy', '_group_name',
+ '_volumes', '_storage_pools',
+ '_storage_pools_time',
+ '_kv_cache', '_resource_cache', '_volume_info_cache',
+ '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
+ )
+
+ DEV_ROOT_PATH = DRBD_BY_RES_PATH
# Default LVM extent size.
BLOCK_SIZE = 4 * 1024 * 1024
@@ -90,7 +331,7 @@ class LinstorVolumeManager(object):
# Property namespaces.
NAMESPACE_SR = 'xcp/sr'
- NAMESPACE_VOLUME = 'volume'
+ NAMESPACE_VOLUME = 'xcp/volume'
# Regex to match properties.
REG_PROP = '^([^/]+)/{}$'
@@ -106,6 +347,10 @@ class LinstorVolumeManager(object):
PREFIX_SR = 'xcp-sr-'
PREFIX_VOLUME = 'xcp-volume-'
+ # Limit request number when storage pool info is asked, we fetch
+ # the current pool status after N elapsed seconds.
+ STORAGE_POOLS_FETCH_INTERVAL = 15
+
@staticmethod
def default_logger(*args):
print(args)
@@ -117,38 +362,43 @@ def default_logger(*args):
class VolumeInfo(object):
__slots__ = (
'name',
- 'physical_size', # Total physical size used by this volume on
- # all disks.
- 'virtual_size' # Total virtual available size of this volume
- # (i.e. the user size at creation).
+ 'allocated_size', # Allocated size, place count is not used.
+ 'virtual_size', # Total virtual available size of this volume
+ # (i.e. the user size at creation).
+ 'diskful' # Array of nodes that have a diskful volume.
)
def __init__(self, name):
self.name = name
- self.physical_size = 0
+ self.allocated_size = 0
self.virtual_size = 0
+ self.diskful = []
def __repr__(self):
- return 'VolumeInfo("{}", {}, {})'.format(
- self.name, self.physical_size, self.virtual_size
+ return 'VolumeInfo("{}", {}, {}, {})'.format(
+ self.name, self.allocated_size, self.virtual_size,
+ self.diskful
)
# --------------------------------------------------------------------------
def __init__(
- self, uri, group_name, repair=False, logger=default_logger.__func__
+ self, uri, group_name, repair=False, logger=default_logger.__func__,
+ attempt_count=30
):
"""
- Create a new LinstorApi object.
+ Create a new LinstorVolumeManager object.
:param str uri: URI to communicate with the LINSTOR controller.
:param str group_name: The SR goup name to use.
:param bool repair: If true we try to remove bad volumes due to a crash
or unexpected behavior.
:param function logger: Function to log messages.
+ :param int attempt_count: Number of attempts to join the controller.
"""
- self._uri = uri
- self._linstor = self._create_linstor_instance(uri)
+ self._linstor = self._create_linstor_instance(
+ uri, attempt_count=attempt_count
+ )
self._base_group_name = group_name
# Ensure group exists.
@@ -164,6 +414,16 @@ def __init__(
self._logger = logger
self._redundancy = groups[0].select_filter.place_count
self._group_name = group_name
+ self._volumes = set()
+ self._storage_pools_time = 0
+
+ # To increate performance and limit request count to LINSTOR services,
+ # we use caches.
+ self._kv_cache = self._create_kv_cache()
+ self._resource_cache = None
+ self._resource_cache_dirty = True
+ self._volume_info_cache = None
+ self._volume_info_cache_dirty = True
self._build_volumes(repair=repair)
@property
@@ -175,6 +435,15 @@ def group_name(self):
"""
return self._base_group_name
+ @property
+ def redundancy(self):
+ """
+ Give the used redundancy.
+ :return: The redundancy.
+ :rtype: int
+ """
+ return self._redundancy
+
@property
def volumes(self):
"""
@@ -184,66 +453,6 @@ def volumes(self):
"""
return self._volumes
- @property
- def volumes_with_name(self):
- """
- Give a volume dictionnary that contains names actually owned.
- :return: A volume/name dict.
- :rtype: dict(str, str)
- """
- return self._get_volumes_by_property(self.REG_VOLUME_NAME)
-
- @property
- def volumes_with_info(self):
- """
- Give a volume dictionnary that contains VolumeInfos.
- :return: A volume/VolumeInfo dict.
- :rtype: dict(str, VolumeInfo)
- """
-
- volumes = {}
-
- all_volume_info = self._get_volumes_info()
- volume_names = self.volumes_with_name
- for volume_uuid, volume_name in volume_names.items():
- if volume_name:
- volume_info = all_volume_info.get(volume_name)
- if volume_info:
- volumes[volume_uuid] = volume_info
- continue
-
- # Well I suppose if this volume is not available,
- # LINSTOR has been used directly without using this API.
- volumes[volume_uuid] = self.VolumeInfo('')
-
- return volumes
-
- @property
- def volumes_with_metadata(self):
- """
- Give a volume dictionnary that contains metadata.
- :return: A volume/metadata dict.
- :rtype: dict(str, dict)
- """
-
- volumes = {}
-
- metadata = self._get_volumes_by_property(self.REG_METADATA)
- for volume_uuid, volume_metadata in metadata.items():
- if volume_metadata:
- volume_metadata = json.loads(volume_metadata)
- if isinstance(volume_metadata, dict):
- volumes[volume_uuid] = volume_metadata
- continue
- raise LinstorVolumeManagerError(
- 'Expected dictionary in volume metadata: {}'
- .format(volume_uuid)
- )
-
- volumes[volume_uuid] = {}
-
- return volumes
-
@property
def max_volume_size_allowed(self):
"""
@@ -284,26 +493,67 @@ def physical_free_size(self):
return self._compute_size('free_capacity')
@property
- def total_allocated_volume_size(self):
+ def allocated_volume_size(self):
"""
- Give the sum of all created volumes.
- :return: The physical required size to use the volumes.
+ Give the allocated size for all volumes. The place count is not
+ used here. When thick lvm is used, the size for one volume should
+ be equal to the virtual volume size. With thin lvm, the size is equal
+ or lower to the volume size.
+ :return: The allocated size of all volumes.
:rtype: int
"""
- size = 0
- for resource in self._linstor.resource_list_raise().resources:
+ # Paths: /res_name/vol_number/size
+ sizes = {}
+
+ for resource in self._get_resource_cache().resources:
+ if resource.name not in sizes:
+ current = sizes[resource.name] = {}
+ else:
+ current = sizes[resource.name]
+
for volume in resource.volumes:
# We ignore diskless pools of the form "DfltDisklessStorPool".
- if volume.storage_pool_name == self._group_name:
- current_size = volume.usable_size
- if current_size < 0:
- raise LinstorVolumeManagerError(
- 'Failed to get usable size of `{}` on `{}`'
- .format(resource.name, volume.storage_pool_name)
- )
- size += current_size
- return size * 1024
+ if volume.storage_pool_name != self._group_name:
+ continue
+
+ current_size = volume.allocated_size
+ if current_size < 0:
+ raise LinstorVolumeManagerError(
+ 'Failed to get allocated size of `{}` on `{}`'
+ .format(resource.name, volume.storage_pool_name)
+ )
+ current[volume.number] = max(current_size, current.get(volume.number) or 0)
+
+ total_size = 0
+ for volumes in sizes.itervalues():
+ for size in volumes.itervalues():
+ total_size += size
+
+ return total_size * 1024
+
+ def get_min_physical_size(self):
+ """
+ Give the minimum physical size of the SR.
+ I.e. the size of the smallest disk + the number of pools.
+ :return: The physical min size.
+ :rtype: tuple(int, int)
+ """
+ size = None
+ pool_count = 0
+ for pool in self._get_storage_pools(force=True):
+ space = pool.free_space
+ if space:
+ pool_count += 1
+ current_size = space.total_capacity
+ if current_size < 0:
+ raise LinstorVolumeManagerError(
+ 'Failed to get pool total_capacity attr of `{}`'
+ .format(pool.node_name)
+ )
+ if size is None or current_size < size:
+ size = current_size
+ return (pool_count, (size or 0) * 1024)
@property
def metadata(self):
@@ -346,12 +596,8 @@ def disconnected_hosts(self):
:rtype: set(str)
"""
- pools = self._linstor.storage_pool_list_raise(
- filter_by_stor_pools=[self._group_name]
- ).storage_pools
-
disconnected_hosts = set()
- for pool in pools:
+ for pool in self._get_storage_pools():
for report in pool.reports:
if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
linstor.consts.WARN_NOT_CONNECTED:
@@ -367,23 +613,33 @@ def check_volume_exists(self, volume_uuid):
"""
return volume_uuid in self._volumes
- def create_volume(self, volume_uuid, size, persistent=True):
+ def create_volume(
+ self, volume_uuid, size, persistent=True, volume_name=None,
+ no_diskless=False
+ ):
"""
Create a new volume on the SR.
:param str volume_uuid: The volume uuid to use.
:param int size: volume size in B.
:param bool persistent: If false the volume will be unavailable
on the next constructor call LinstorSR(...).
+ :param str volume_name: If set, this name is used in the LINSTOR
+ database instead of a generated name.
+ :param bool no_diskless: If set, the default group redundancy is not
+ used, instead the volume is created on all nodes.
:return: The current device path of the volume.
:rtype: str
"""
self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
- volume_name = self.build_volume_name(util.gen_uuid())
+ if not volume_name:
+ volume_name = self.build_volume_name(util.gen_uuid())
volume_properties = self._create_volume_with_properties(
- volume_uuid, volume_name, size, place_resources=True
+ volume_uuid, volume_name, size, place_resources=True,
+ no_diskless=no_diskless
)
+ # Volume created! Now try to find the device path.
try:
self._logger(
'Find device path of LINSTOR volume {}...'.format(volume_uuid)
@@ -396,8 +652,10 @@ def create_volume(self, volume_uuid, size, persistent=True):
'LINSTOR volume {} created!'.format(volume_uuid)
)
return device_path
- except Exception:
- self._force_destroy_volume(volume_uuid, volume_properties)
+ except Exception as e:
+ # There is an issue to find the path.
+ # At this point the volume has just been created, so force flag can be used.
+ self._destroy_volume(volume_uuid, force=True)
raise
def mark_volume_as_persistent(self, volume_uuid):
@@ -426,7 +684,7 @@ def destroy_volume(self, volume_uuid):
volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
self._volumes.remove(volume_uuid)
- self._destroy_volume(volume_uuid, volume_properties)
+ self._destroy_volume(volume_uuid)
def lock_volume(self, volume_uuid, locked=True):
"""
@@ -476,12 +734,15 @@ def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
waiting = False
+ volume_properties = self._get_kv_cache()
+
start = time.time()
while True:
# Can't delete in for loop, use a copy of the list.
remaining = checked.copy()
for volume_uuid in checked:
- volume_properties = self._get_volume_properties(volume_uuid)
+ volume_properties.namespace = \
+ self._build_volume_namespace(volume_uuid)
timestamp = volume_properties.get(
self.PROP_IS_READONLY_TIMESTAMP
)
@@ -519,6 +780,7 @@ def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
# We must wait to use the volume. After that we can modify it
# ONLY if the SR is locked to avoid bad reads on the slaves.
time.sleep(1)
+ volume_properties = self._create_kv_cache()
if waiting:
self._logger('No volume locked now!')
@@ -542,6 +804,9 @@ def resize_volume(self, volume_uuid, new_size):
volume_nr=0,
size=new_size // 1024
)
+
+ self._mark_resource_cache_as_dirty()
+
error_str = self._get_error_str(result)
if error_str:
raise LinstorVolumeManagerError(
@@ -587,6 +852,25 @@ def get_volume_size(self, volume_uuid):
)
return size * 1024
+
+ def set_auto_promote_timeout(self, volume_uuid, timeout):
+ """
+ Define the blocking time of open calls when a DRBD
+ is already open on another host.
+ :param str volume_uuid: The volume uuid to modify.
+ """
+
+ volume_name = self.get_volume_name(volume_uuid)
+ result = self._linstor.resource_dfn_modify(volume_name, {
+ 'DrbdOptions/Resource/auto-promote-timeout': timeout
+ })
+ error_str = self._get_error_str(result)
+ if error_str:
+ raise LinstorVolumeManagerError(
+ 'Could not change the auto promote timeout of `{}`: {}'
+ .format(volume_uuid, error_str)
+ )
+
def get_volume_info(self, volume_uuid):
"""
Get the volume info of a particular volume.
@@ -596,7 +880,7 @@ def get_volume_info(self, volume_uuid):
"""
volume_name = self.get_volume_name(volume_uuid)
- return self._get_volumes_info(filter=[volume_name])[volume_name]
+ return self._get_volumes_info()[volume_name]
def get_device_path(self, volume_uuid):
"""
@@ -620,7 +904,7 @@ def get_volume_uuid_from_device_path(self, device_path):
expected_volume_name = \
self.get_volume_name_from_device_path(device_path)
- volume_names = self.volumes_with_name
+ volume_names = self.get_volumes_with_name()
for volume_uuid, volume_name in volume_names.items():
if volume_name == expected_volume_name:
return volume_uuid
@@ -631,26 +915,24 @@ def get_volume_uuid_from_device_path(self, device_path):
def get_volume_name_from_device_path(self, device_path):
"""
- Get the volume name of a device_path on the current host.
+ Get the volume name of a device_path.
:param str device_path: The dev path to find the volume name.
- :return: The volume name of the local device path.
+ :return: The volume name of the device path.
:rtype: str
"""
- node_name = socket.gethostname()
- resources = self._linstor.resource_list_raise(
- filter_by_nodes=[node_name]
- ).resources
-
- real_device_path = os.path.realpath(device_path)
- for resource in resources:
- if resource.volumes[0].device_path == real_device_path:
- return resource.name
+ # Assume that we have a path like this:
+ # - "/dev/drbd/by-res/xcp-volume-/0"
+ # - "../xcp-volume-/0"
+ if device_path.startswith(DRBD_BY_RES_PATH):
+ prefix_len = len(DRBD_BY_RES_PATH)
+ else:
+ assert device_path.startswith('../')
+ prefix_len = 3
- raise LinstorVolumeManagerError(
- 'Unable to find volume name from dev path `{}`'
- .format(device_path)
- )
+ res_name_end = device_path.find('/', prefix_len)
+ assert res_name_end != -1
+ return device_path[prefix_len:res_name_end]
def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
"""
@@ -664,6 +946,8 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
deleted VDI.
"""
+ assert volume_uuid != new_volume_uuid
+
self._logger(
'Trying to update volume UUID {} to {}...'
.format(volume_uuid, new_volume_uuid)
@@ -685,36 +969,45 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
.format(volume_uuid)
)
- new_volume_properties = self._get_volume_properties(
+ # 1. Copy in temp variables metadata and volume_name.
+ metadata = volume_properties.get(self.PROP_METADATA)
+ volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
+
+ # 2. Switch to new volume namespace.
+ volume_properties.namespace = self._build_volume_namespace(
new_volume_uuid
)
- if list(new_volume_properties.items()):
+
+ if list(volume_properties.items()):
raise LinstorVolumeManagerError(
'Cannot update volume uuid {} to {}: '
.format(volume_uuid, new_volume_uuid) +
'this last one is not empty'
)
- assert volume_properties.namespace != \
- new_volume_properties.namespace
-
try:
- # 1. Mark new volume properties with PROP_UPDATING_UUID_SRC.
+ # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
# If we crash after that, the new properties can be removed
# properly.
- new_volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
- new_volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
-
- # 2. Copy the properties.
- for property in [self.PROP_METADATA, self.PROP_VOLUME_NAME]:
- new_volume_properties[property] = \
- volume_properties.get(property)
+ volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
+ volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
+
+ # 4. Copy the properties.
+ # Note: On new volumes, during clone for example, the metadata
+ # may be missing. So we must test it to avoid this error:
+ # "None has to be a str/unicode, but is "
+ if metadata:
+ volume_properties[self.PROP_METADATA] = metadata
+ volume_properties[self.PROP_VOLUME_NAME] = volume_name
- # 3. Ok!
- new_volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
+ # 5. Ok!
+ volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
except Exception as e:
try:
- new_volume_properties.clear()
+ # Clear the new volume properties in case of failure.
+ assert volume_properties.namespace == \
+ self._build_volume_namespace(new_volume_uuid)
+ volume_properties.clear()
except Exception as e:
self._logger(
'Failed to clear new volume properties: {} (ignoring...)'
@@ -725,11 +1018,21 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
)
try:
- # 4. After this point, it's ok we can remove the
+ # 6. After this point, it's ok we can remove the
# PROP_UPDATING_UUID_SRC property and clear the src properties
# without problems.
+
+ # 7. Switch to old volume namespace.
+ volume_properties.namespace = self._build_volume_namespace(
+ volume_uuid
+ )
volume_properties.clear()
- new_volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
+
+ # 8. Switch a last time to new volume namespace.
+ volume_properties.namespace = self._build_volume_namespace(
+ new_volume_uuid
+ )
+ volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
except Exception as e:
raise LinstorVolumeManagerError(
'Failed to clear volume properties '
@@ -743,7 +1046,7 @@ def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
'UUID update succeeded of {} to {}! (properties={})'
.format(
volume_uuid, new_volume_uuid,
- self._get_filtered_properties(new_volume_properties)
+ self._get_filtered_properties(volume_properties)
)
)
@@ -788,6 +1091,73 @@ def get_usage_states(self, volume_uuid):
return states
+ def get_volume_openers(self, volume_uuid):
+ """
+ Get openers of a volume.
+ :param str volume_uuid: The volume uuid to monitor.
+ :return: A dictionnary that contains openers.
+ :rtype: dict(str, obj)
+ """
+ return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
+
+
+ def get_volumes_with_name(self):
+ """
+ Give a volume dictionnary that contains names actually owned.
+ :return: A volume/name dict.
+ :rtype: dict(str, str)
+ """
+ return self._get_volumes_by_property(self.REG_VOLUME_NAME)
+
+ def get_volumes_with_info(self):
+ """
+ Give a volume dictionnary that contains VolumeInfos.
+ :return: A volume/VolumeInfo dict.
+ :rtype: dict(str, VolumeInfo)
+ """
+
+ volumes = {}
+
+ all_volume_info = self._get_volumes_info()
+ volume_names = self.get_volumes_with_name()
+ for volume_uuid, volume_name in volume_names.items():
+ if volume_name:
+ volume_info = all_volume_info.get(volume_name)
+ if volume_info:
+ volumes[volume_uuid] = volume_info
+ continue
+
+ # Well I suppose if this volume is not available,
+ # LINSTOR has been used directly without using this API.
+ volumes[volume_uuid] = self.VolumeInfo('')
+
+ return volumes
+
+ def get_volumes_with_metadata(self):
+ """
+ Give a volume dictionnary that contains metadata.
+ :return: A volume/metadata dict.
+ :rtype: dict(str, dict)
+ """
+
+ volumes = {}
+
+ metadata = self._get_volumes_by_property(self.REG_METADATA)
+ for volume_uuid, volume_metadata in metadata.items():
+ if volume_metadata:
+ volume_metadata = json.loads(volume_metadata)
+ if isinstance(volume_metadata, dict):
+ volumes[volume_uuid] = volume_metadata
+ continue
+ raise LinstorVolumeManagerError(
+ 'Expected dictionary in volume metadata: {}'
+ .format(volume_uuid)
+ )
+
+ volumes[volume_uuid] = {}
+
+ return volumes
+
def get_volume_metadata(self, volume_uuid):
"""
Get the metadata of a volume.
@@ -910,17 +1280,11 @@ def find_best_nodes():
rsc_name=clone_volume_name,
storage_pool=self._group_name
))
- for node_name in diskless_node_names:
- resources.append(linstor.ResourceData(
- node_name=node_name,
- rsc_name=clone_volume_name,
- diskless=True
- ))
# 5. Create resources!
- def clean(properties):
+ def clean():
try:
- self._destroy_volume(clone_uuid, properties)
+ self._destroy_volume(clone_uuid, force=True)
except Exception as e:
self._logger(
'Unable to destroy volume {} after shallow clone fail: {}'
@@ -928,12 +1292,16 @@ def clean(properties):
)
def create():
- try:
- volume_properties = self._create_volume_with_properties(
- clone_uuid, clone_volume_name, size,
- place_resources=False
- )
+ # Note: placed outside try/except block because we create only definition first.
+ # There is no reason to call `clean` before the real resource creation.
+ volume_properties = self._create_volume_with_properties(
+ clone_uuid, clone_volume_name, size,
+ place_resources=False
+ )
+ # After this point, `clean` can be called for any fail because the clone UUID
+ # is really unique. No risk to remove existing data.
+ try:
result = self._linstor.resource_create(resources)
error_str = self._get_error_str(result)
if error_str:
@@ -946,7 +1314,7 @@ def create():
)
return volume_properties
except Exception:
- clean(volume_properties)
+ clean()
raise
# Retry because we can get errors like this:
@@ -962,7 +1330,7 @@ def create():
self._volumes.add(clone_uuid)
return device_path
except Exception as e:
- clean(volume_properties)
+ clean()
raise
def remove_resourceless_volumes(self):
@@ -974,83 +1342,337 @@ def remove_resourceless_volumes(self):
"""
resource_names = self._fetch_resource_names()
- for volume_uuid, volume_name in self.volumes_with_name.items():
+ for volume_uuid, volume_name in self.get_volumes_with_name().items():
if not volume_name or volume_name not in resource_names:
+ # Don't force, we can be sure of what's happening.
self.destroy_volume(volume_uuid)
- def destroy(self, force=False):
+ def destroy(self):
"""
Destroy this SR. Object should not be used after that.
:param bool force: Try to destroy volumes before if true.
"""
- if (force):
- for volume_uuid in self._volumes:
- self.destroy_volume(volume_uuid)
+ if self._volumes:
+ raise LinstorVolumeManagerError(
+ 'Cannot destroy LINSTOR volume manager: '
+ 'It exists remaining volumes'
+ )
+
+ controller_is_running = self._controller_is_running()
+ uri = 'linstor://localhost'
+ try:
+ if controller_is_running:
+ self._start_controller(start=False)
+
+ # 1. Umount LINSTOR database.
+ self._mount_database_volume(
+ self.build_device_path(DATABASE_VOLUME_NAME),
+ mount=False,
+ force=True
+ )
+
+ # 2. Refresh instance.
+ self._start_controller(start=True)
+ self._linstor = self._create_linstor_instance(
+ uri, keep_uri_unmodified=True
+ )
- # TODO: Throw exceptions in the helpers below if necessary.
- # TODO: What's the required action if it exists remaining volumes?
+ # 3. Destroy database volume.
+ self._destroy_resource(DATABASE_VOLUME_NAME)
- self._destroy_resource_group(self._linstor, self._group_name)
+ # 4. Destroy group and storage pools.
+ self._destroy_resource_group(self._linstor, self._group_name)
+ for pool in self._get_storage_pools(force=True):
+ self._destroy_storage_pool(
+ self._linstor, pool.name, pool.node_name
+ )
+ except Exception as e:
+ self._start_controller(start=controller_is_running)
+ raise e
- pools = self._linstor.storage_pool_list_raise(
- filter_by_stor_pools=[self._group_name]
- ).storage_pools
- for pool in pools:
- self._destroy_storage_pool(
- self._linstor, pool.name, pool.node_name
+ try:
+ self._start_controller(start=False)
+ for file in glob.glob(DATABASE_PATH + '/'):
+ os.remove(file)
+ except Exception as e:
+ util.SMlog(
+ 'Ignoring failure after LINSTOR SR destruction: {}'
+ .format(e)
)
- def find_up_to_date_diskfull_nodes(self, volume_uuid):
+ def find_up_to_date_diskful_nodes(self, volume_uuid):
"""
- Find all nodes that contain a specific volume using diskfull disks.
+ Find all nodes that contain a specific volume using diskful disks.
The disk must be up to data to be used.
:param str volume_uuid: The volume to use.
:return: The available nodes.
- :rtype: tuple(set(str), bool)
+ :rtype: tuple(set(str), str)
"""
volume_name = self.get_volume_name(volume_uuid)
- in_use = False
+ in_use_by = None
node_names = set()
- resource_list = self._linstor.resource_list_raise(
- filter_by_resources=[volume_name]
+
+ resource_states = filter(
+ lambda resource_state: resource_state.name == volume_name,
+ self._get_resource_cache().resource_states
)
- for resource_state in resource_list.resource_states:
+
+ for resource_state in resource_states:
volume_state = resource_state.volume_states[0]
if volume_state.disk_state == 'UpToDate':
node_names.add(resource_state.node_name)
if resource_state.in_use:
- in_use = True
+ in_use_by = resource_state.node_name
+
+ return (node_names, in_use_by)
+
+ def invalidate_resource_cache(self):
+ """
+ If resources are impacted by external commands like vhdutil,
+ it's necessary to call this function to invalidate current resource
+ cache.
+ """
+ self._mark_resource_cache_as_dirty()
- return (node_names, in_use)
+ def has_node(self, node_name):
+ """
+ Check if a node exists in the LINSTOR database.
+ :rtype: bool
+ """
+ result = self._linstor.node_list()
+ error_str = self._get_error_str(result)
+ if error_str:
+ raise LinstorVolumeManagerError(
+ 'Failed to list nodes using `{}`: {}'
+ .format(node_name, error_str)
+ )
+ return bool(result[0].node(node_name))
+
+ def create_node(self, node_name, ip):
+ """
+ Create a new node in the LINSTOR database.
+ :param str node_name: Node name to use.
+ :param str ip: Host IP to communicate.
+ """
+ result = self._linstor.node_create(
+ node_name,
+ linstor.consts.VAL_NODE_TYPE_CMBD,
+ ip
+ )
+ errors = self._filter_errors(result)
+ if errors:
+ error_str = self._get_error_str(errors)
+ raise LinstorVolumeManagerError(
+ 'Failed to create node `{}`: {}'.format(node_name, error_str)
+ )
+
+ def destroy_node(self, node_name):
+ """
+ Destroy a node in the LINSTOR database.
+ :param str node_name: Node name to remove.
+ """
+ result = self._linstor.node_delete(node_name)
+ errors = self._filter_errors(result)
+ if errors:
+ error_str = self._get_error_str(errors)
+ raise LinstorVolumeManagerError(
+ 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
+ )
+
+ def get_nodes_info(self):
+ """
+ Get all nodes + statuses, used or not by the pool.
+ :rtype: dict(str, dict)
+ """
+ try:
+ nodes = {}
+ for node in self._linstor.node_list_raise().nodes:
+ nodes[node.name] = node.connection_status
+ return nodes
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Failed to get all nodes: `{}`'.format(e)
+ )
+
+ def get_storage_pools_info(self):
+ """
+ Give all storage pools of current group name.
+ :rtype: dict(str, list)
+ """
+ storage_pools = {}
+ for pool in self._get_storage_pools(force=True):
+ if pool.node_name not in storage_pools:
+ storage_pools[pool.node_name] = []
+
+ size = -1
+ capacity = -1
+
+ space = pool.free_space
+ if space:
+ size = space.free_capacity
+ if size < 0:
+ size = -1
+ else:
+ size *= 1024
+ capacity = space.total_capacity
+ if capacity <= 0:
+ capacity = -1
+ else:
+ capacity *= 1024
+
+ storage_pools[pool.node_name].append({
+ 'storage-pool-name': pool.name,
+ 'uuid': pool.uuid,
+ 'free-size': size,
+ 'capacity': capacity
+ })
+
+ return storage_pools
+
+ def get_resources_info(self):
+ """
+ Give all resources of current group name.
+ :rtype: dict(str, list)
+ """
+ resources = {}
+ resource_list = self._linstor.resource_list_raise()
+ for resource in resource_list.resources:
+ if resource.name not in resources:
+ resources[resource.name] = {}
+
+ resources[resource.name][resource.node_name] = {
+ 'volumes': [],
+ 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
+ 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
+ }
+
+ for volume in resource.volumes:
+ # We ignore diskless pools of the form "DfltDisklessStorPool".
+ if volume.storage_pool_name != self._group_name:
+ continue
+
+ usable_size = volume.usable_size
+ if usable_size < 0:
+ usable_size = -1
+ else:
+ usable_size *= 1024
+
+ allocated_size = volume.allocated_size
+ if allocated_size < 0:
+ allocated_size = -1
+ else:
+ allocated_size *= 1024
+
+ resources[resource.name][resource.node_name]['volumes'].append({
+ 'storage-pool-name': volume.storage_pool_name,
+ 'uuid': volume.uuid,
+ 'number': volume.number,
+ 'device-path': volume.device_path,
+ 'usable-size': usable_size,
+ 'allocated-size': allocated_size
+ })
+
+ for resource_state in resource_list.resource_states:
+ resource = resources[resource_state.rsc_name][resource_state.node_name]
+ resource['in-use'] = resource_state.in_use
+
+ volumes = resource['volumes']
+ for volume_state in resource_state.volume_states:
+ volume = next((x for x in volumes if x['number'] == volume_state.number), None)
+ if volume:
+ volume['disk-state'] = volume_state.disk_state
+
+ return resources
@classmethod
def create_sr(
- cls, uri, group_name, node_names, redundancy,
- thin_provisioning=False,
+ cls, group_name, ips, redundancy,
+ thin_provisioning, auto_quorum,
logger=default_logger.__func__
):
"""
Create a new SR on the given nodes.
- :param str uri: URI to communicate with the LINSTOR controller.
:param str group_name: The SR group_name to use.
- :param list[str] node_names: String list of nodes.
+ :param set(str) ips: Node ips.
:param int redundancy: How many copy of volumes should we store?
+ :param bool thin_provisioning: Use thin or thick provisioning.
+ :param bool auto_quorum: DB quorum is monitored by LINSTOR.
:param function logger: Function to log messages.
:return: A new LinstorSr instance.
:rtype: LinstorSr
"""
+ try:
+ cls._start_controller(start=True)
+ sr = cls._create_sr(
+ group_name,
+ ips,
+ redundancy,
+ thin_provisioning,
+ auto_quorum,
+ logger
+ )
+ finally:
+ # Controller must be stopped and volume unmounted because
+ # it is the role of the drbd-reactor daemon to do the right
+ # actions.
+ cls._start_controller(start=False)
+ cls._mount_volume(
+ cls.build_device_path(DATABASE_VOLUME_NAME),
+ DATABASE_PATH,
+ mount=False
+ )
+ return sr
+
+ @classmethod
+ def _create_sr(
+ cls, group_name, ips, redundancy,
+ thin_provisioning, auto_quorum,
+ logger=default_logger.__func__
+ ):
# 1. Check if SR already exists.
- lin = cls._create_linstor_instance(uri)
+ uri = 'linstor://localhost'
+
+ lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
+
+ node_names = ips.keys()
+ for node_name, ip in ips.iteritems():
+ while True:
+ # Try to create node.
+ result = lin.node_create(
+ node_name,
+ linstor.consts.VAL_NODE_TYPE_CMBD,
+ ip
+ )
+
+ errors = cls._filter_errors(result)
+ if cls._check_errors(
+ errors, [linstor.consts.FAIL_EXISTS_NODE]
+ ):
+ # If it already exists, remove, then recreate.
+ result = lin.node_delete(node_name)
+ error_str = cls._get_error_str(result)
+ if error_str:
+ raise LinstorVolumeManagerError(
+ 'Failed to remove old node `{}`: {}'
+ .format(node_name, error_str)
+ )
+ elif not errors:
+ break # Created!
+ else:
+ raise LinstorVolumeManagerError(
+ 'Failed to create node `{}` with ip `{}`: {}'.format(
+ node_name, ip, cls._get_error_str(errors)
+ )
+ )
+
driver_pool_name = group_name
+ base_group_name = group_name
group_name = cls._build_group_name(group_name)
pools = lin.storage_pool_list_raise(filter_by_stor_pools=[group_name])
-
- # TODO: Maybe if the SR already exists and if the nodes are the same,
- # we can try to use it directly.
pools = pools.storage_pools
if pools:
existing_node_names = [pool.node_name for pool in pools]
@@ -1076,9 +1698,14 @@ def create_sr(
)
# 2. Create storage pool on each node + resource group.
+ reg_volume_group_not_found = re.compile(
+ ".*Volume group '.*' not found$"
+ )
+
i = 0
try:
# 2.a. Create storage pools.
+ storage_pool_count = 0
while i < len(node_names):
node_name = node_names[i]
@@ -1089,17 +1716,35 @@ def create_sr(
driver_pool_name=driver_pool_name
)
- error_str = cls._get_error_str(result)
- if error_str:
- raise LinstorVolumeManagerError(
- 'Could not create SP `{}` on node `{}`: {}'.format(
- group_name,
- node_name,
- error_str
+ errors = linstor.Linstor.filter_api_call_response_errors(
+ result
+ )
+ if errors:
+ if len(errors) == 1 and errors[0].is_error(
+ linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
+ ) and reg_volume_group_not_found.match(errors[0].message):
+ logger(
+ 'Volume group `{}` not found on `{}`. Ignoring...'
+ .format(group_name, node_name)
)
- )
+ cls._destroy_storage_pool(lin, group_name, node_name)
+ else:
+ error_str = cls._get_error_str(result)
+ raise LinstorVolumeManagerError(
+ 'Could not create SP `{}` on node `{}`: {}'
+ .format(group_name, node_name, error_str)
+ )
+ else:
+ storage_pool_count += 1
i += 1
+ if not storage_pool_count:
+ raise LinstorVolumeManagerError(
+ 'Unable to create SR `{}`: No VG group found'.format(
+ group_name,
+ )
+ )
+
# 2.b. Create resource group.
result = lin.resource_group_create(
name=group_name,
@@ -1125,30 +1770,78 @@ def create_sr(
)
)
- # 3. Remove storage pools/resource/volume group in the case of errors.
+ # 3. Create the LINSTOR database volume and mount it.
+ try:
+ logger('Creating database volume...')
+ volume_path = cls._create_database_volume(
+ lin, group_name, node_names, redundancy, auto_quorum
+ )
+ except LinstorVolumeManagerError as e:
+ if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
+ logger('Destroying database volume after creation fail...')
+ cls._force_destroy_database_volume(lin, group_name)
+ raise
+
+ try:
+ logger('Mounting database volume...')
+
+ # First we must disable the controller to move safely the
+ # LINSTOR config.
+ cls._start_controller(start=False)
+
+ cls._mount_database_volume(volume_path)
+ except Exception as e:
+ # Ensure we are connected because controller has been
+ # restarted during mount call.
+ logger('Destroying database volume after mount fail...')
+
+ try:
+ cls._start_controller(start=True)
+ except Exception:
+ pass
+
+ lin = cls._create_linstor_instance(
+ uri, keep_uri_unmodified=True
+ )
+ cls._force_destroy_database_volume(lin, group_name)
+ raise e
+
+ cls._start_controller(start=True)
+ lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
+
+ # 4. Remove storage pools/resource/volume group in the case of errors.
except Exception as e:
+ logger('Destroying resource group and storage pools after fail...')
try:
cls._destroy_resource_group(lin, group_name)
- except Exception:
+ except Exception as e2:
+ logger('Failed to destroy resource group: {}'.format(e2))
pass
j = 0
i = min(i, len(node_names) - 1)
while j <= i:
try:
cls._destroy_storage_pool(lin, group_name, node_names[j])
- except Exception:
+ except Exception as e2:
+ logger('Failed to destroy resource group: {}'.format(e2))
pass
j += 1
raise e
- # 4. Return new instance.
+ # 5. Return new instance.
instance = cls.__new__(cls)
- instance._uri = uri
instance._linstor = lin
instance._logger = logger
instance._redundancy = redundancy
+ instance._base_group_name = base_group_name
instance._group_name = group_name
instance._volumes = set()
+ instance._storage_pools_time = 0
+ instance._kv_cache = instance._create_kv_cache()
+ instance._resource_cache = None
+ instance._resource_cache_dirty = True
+ instance._volume_info_cache = None
+ instance._volume_info_cache_dirty = True
return instance
@classmethod
@@ -1196,6 +1889,32 @@ def round_down_volume_size(cls, volume_size):
# Private helpers.
# --------------------------------------------------------------------------
+ def _create_kv_cache(self):
+ self._kv_cache = self._create_linstor_kv('/')
+ self._kv_cache_dirty = False
+ return self._kv_cache
+
+ def _get_kv_cache(self):
+ if self._kv_cache_dirty:
+ self._kv_cache = self._create_kv_cache()
+ return self._kv_cache
+
+ def _create_resource_cache(self):
+ self._resource_cache = self._linstor.resource_list_raise()
+ self._resource_cache_dirty = False
+ return self._resource_cache
+
+ def _get_resource_cache(self):
+ if self._resource_cache_dirty:
+ self._resource_cache = self._create_resource_cache()
+ return self._resource_cache
+
+ def _mark_resource_cache_as_dirty(self):
+ self._resource_cache_dirty = True
+ self._volume_info_cache_dirty = True
+
+ # --------------------------------------------------------------------------
+
def _ensure_volume_exists(self, volume_uuid):
if volume_uuid not in self._volumes:
raise LinstorVolumeManagerError(
@@ -1224,12 +1943,13 @@ def _fetch_resource_names(self):
resource_names.add(dfn.name)
return resource_names
- def _get_volumes_info(self, filter=None):
+ def _get_volumes_info(self, volume_name=None):
all_volume_info = {}
- resources = self._linstor.resource_list_raise(
- filter_by_resources=filter
- )
- for resource in resources.resources:
+
+ if not self._volume_info_cache_dirty:
+ return self._volume_info_cache
+
+ for resource in self._get_resource_cache().resources:
if resource.name not in all_volume_info:
current = all_volume_info[resource.name] = self.VolumeInfo(
resource.name
@@ -1237,6 +1957,9 @@ def _get_volumes_info(self, filter=None):
else:
current = all_volume_info[resource.name]
+ if linstor.consts.FLAG_DISKLESS not in resource.flags:
+ current.diskful.append(resource.node_name)
+
for volume in resource.volumes:
# We ignore diskless pools of the form "DfltDisklessStorPool".
if volume.storage_pool_name == self._group_name:
@@ -1245,22 +1968,32 @@ def _get_volumes_info(self, filter=None):
'Failed to get allocated size of `{}` on `{}`'
.format(resource.name, volume.storage_pool_name)
)
- current.physical_size += volume.allocated_size
+ allocated_size = volume.allocated_size
- if volume.usable_size < 0:
- raise LinstorVolumeManagerError(
- 'Failed to get usable size of `{}` on `{}`'
- .format(resource.name, volume.storage_pool_name)
- )
- virtual_size = volume.usable_size
+ current.allocated_size = current.allocated_size and \
+ max(current.allocated_size, allocated_size) or \
+ allocated_size
- current.virtual_size = current.virtual_size and \
- min(current.virtual_size, virtual_size) or virtual_size
+ usable_size = volume.usable_size
+ if usable_size > 0 and (
+ usable_size < current.virtual_size or
+ not current.virtual_size
+ ):
+ current.virtual_size = usable_size
+
+ if current.virtual_size <= 0:
+ raise LinstorVolumeManagerError(
+ 'Failed to get usable size of `{}` on `{}`'
+ .format(resource.name, volume.storage_pool_name)
+ )
for current in all_volume_info.values():
- current.physical_size *= 1024
+ current.allocated_size *= 1024
current.virtual_size *= 1024
+ self._volume_info_cache_dirty = False
+ self._volume_info_cache = all_volume_info
+
return all_volume_info
def _get_volume_node_names_and_size(self, volume_name):
@@ -1289,12 +2022,8 @@ def _get_volume_node_names_and_size(self, volume_name):
return (node_names, size * 1024)
def _compute_size(self, attr):
- pools = self._linstor.storage_pool_list_raise(
- filter_by_stor_pools=[self._group_name]
- ).storage_pools
-
capacity = 0
- for pool in pools:
+ for pool in self._get_storage_pools(force=True):
space = pool.free_space
if space:
size = getattr(space, attr)
@@ -1308,45 +2037,104 @@ def _compute_size(self, attr):
def _get_node_names(self):
node_names = set()
- pools = self._linstor.storage_pool_list_raise(
- filter_by_stor_pools=[self._group_name]
- ).storage_pools
- for pool in pools:
+ for pool in self._get_storage_pools():
node_names.add(pool.node_name)
return node_names
- def _check_volume_creation_errors(self, result, volume_uuid):
- errors = self._filter_errors(result)
- if self._check_errors(errors, [
- linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
- ]):
- raise LinstorVolumeManagerError(
- 'Failed to create volume `{}` from SR `{}`, it already exists'
- .format(volume_uuid, self._group_name),
- LinstorVolumeManagerError.ERR_VOLUME_EXISTS
- )
+ def _get_storage_pools(self, force=False):
+ cur_time = time.time()
+ elsaped_time = cur_time - self._storage_pools_time
- if errors:
+ if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
+ self._storage_pools = self._linstor.storage_pool_list_raise(
+ filter_by_stor_pools=[self._group_name]
+ ).storage_pools
+ self._storage_pools_time = time.time()
+
+ return self._storage_pools
+
+ def _create_volume(
+ self, volume_uuid, volume_name, size, place_resources,
+ no_diskless=False
+ ):
+ if no_diskless and not place_resources:
raise LinstorVolumeManagerError(
- 'Failed to create volume `{}` from SR `{}`: {}'.format(
- volume_uuid,
- self._group_name,
- self._get_error_str(errors)
- )
+ 'Could not create volume `{}` from SR `{}`: it\'s impossible '
+ .format(volume_uuid, self._group_name) +
+ 'to force no diskless without placing resources'
)
- def _create_volume(self, volume_uuid, volume_name, size, place_resources):
size = self.round_up_volume_size(size)
+ self._mark_resource_cache_as_dirty()
- self._check_volume_creation_errors(self._linstor.resource_group_spawn(
- rsc_grp_name=self._group_name,
- rsc_dfn_name=volume_name,
- vlm_sizes=['{}B'.format(size)],
- definitions_only=not place_resources
- ), volume_uuid)
+ resources = []
+ if no_diskless:
+ for node_name in self._get_node_names():
+ resources.append(linstor.ResourceData(
+ node_name=node_name,
+ rsc_name=volume_name,
+ storage_pool=self._group_name
+ ))
+
+ def create_definition():
+ self._check_volume_creation_errors(
+ self._linstor.resource_group_spawn(
+ rsc_grp_name=self._group_name,
+ rsc_dfn_name=volume_name,
+ vlm_sizes=['{}B'.format(size)],
+ definitions_only=True
+ ),
+ volume_uuid,
+ self._group_name
+ )
+ self._configure_volume_peer_slots(self._linstor, volume_name)
+
+ def clean():
+ try:
+ self._destroy_volume(volume_uuid, force=True)
+ except Exception as e:
+ self._logger(
+ 'Unable to destroy volume {} after creation fail: {}'
+ .format(volume_uuid, e)
+ )
+
+ def create():
+ try:
+ create_definition()
+ if no_diskless:
+ # Create a physical resource on each node.
+ result = self._linstor.resource_create(resources)
+ error_str = self._get_error_str(result)
+ if error_str:
+ raise LinstorVolumeManagerError(
+ 'Could not create volume `{}` from SR `{}`: {}'.format(
+ volume_uuid, self._group_name, error_str
+ )
+ )
+ elif place_resources:
+ # Basic case when we use the default redundancy of the group.
+ self._check_volume_creation_errors(
+ self._linstor.resource_auto_place(
+ rsc_name=volume_name,
+ place_count=self._redundancy,
+ diskless_on_remaining=not no_diskless
+ ),
+ volume_uuid,
+ self._group_name
+ )
+ except LinstorVolumeManagerError as e:
+ if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
+ clean()
+ raise
+ except Exception:
+ clean()
+ raise
+
+ util.retry(create, maxretry=5)
def _create_volume_with_properties(
- self, volume_uuid, volume_name, size, place_resources
+ self, volume_uuid, volume_name, size, place_resources,
+ no_diskless=False
):
if self.check_volume_exists(volume_uuid):
raise LinstorVolumeManagerError(
@@ -1375,9 +2163,11 @@ def _create_volume_with_properties(
volume_properties[self.PROP_VOLUME_NAME] = volume_name
self._create_volume(
- volume_uuid, volume_name, size, place_resources
+ volume_uuid, volume_name, size, place_resources, no_diskless
)
+ assert volume_properties.namespace == \
+ self._build_volume_namespace(volume_uuid)
return volume_properties
except LinstorVolumeManagerError as e:
# Do not destroy existing resource!
@@ -1385,12 +2175,8 @@ def _create_volume_with_properties(
# before the `self._create_volume` case.
# It can only happen if the same volume uuid is used in the same
# call in another host.
- if e.code == LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
- raise
- self._force_destroy_volume(volume_uuid, volume_properties)
- raise
- except Exception:
- self._force_destroy_volume(volume_uuid, volume_properties)
+ if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
+ self._destroy_volume(volume_uuid, force=True)
raise
def _find_device_path(self, volume_uuid, volume_name):
@@ -1417,68 +2203,73 @@ def _find_device_path(self, volume_uuid, volume_name):
def _request_device_path(self, volume_uuid, volume_name, activate=False):
node_name = socket.gethostname()
- resources = self._linstor.resource_list(
- filter_by_nodes=[node_name],
- filter_by_resources=[volume_name]
+
+ resources = filter(
+ lambda resource: resource.node_name == node_name and
+ resource.name == volume_name,
+ self._get_resource_cache().resources
)
- if not resources or not resources[0]:
- raise LinstorVolumeManagerError(
- 'No response list for dev path of `{}`'.format(volume_uuid)
- )
- if isinstance(resources[0], linstor.responses.ResourceResponse):
- if not resources[0].resources:
- if activate:
- self._activate_device_path(node_name, volume_name)
- return self._request_device_path(volume_uuid, volume_name)
- raise LinstorVolumeManagerError(
- 'Empty dev path for `{}`, but definition "seems" to exist'
- .format(volume_uuid)
+ if not resources:
+ if activate:
+ self._mark_resource_cache_as_dirty()
+ self._activate_device_path(
+ self._linstor, node_name, volume_name
)
- # Contains a path of the /dev/drbd form.
- return resources[0].resources[0].volumes[0].device_path
-
- raise LinstorVolumeManagerError(
- 'Unable to get volume dev path `{}`: {}'.format(
- volume_uuid, str(resources[0])
+ return self._request_device_path(volume_uuid, volume_name)
+ raise LinstorVolumeManagerError(
+ 'Empty dev path for `{}`, but definition "seems" to exist'
+ .format(volume_uuid)
)
- )
-
- def _activate_device_path(self, node_name, volume_name):
- result = self._linstor.resource_create([
- linstor.ResourceData(node_name, volume_name, diskless=True)
- ])
- if linstor.Linstor.all_api_responses_no_error(result):
- return
- errors = linstor.Linstor.filter_api_call_response_errors(result)
- if len(errors) == 1 and errors[0].is_error(
- linstor.consts.FAIL_EXISTS_RSC
- ):
- return
-
- raise LinstorVolumeManagerError(
- 'Unable to activate device path of `{}` on node `{}`: {}'
- .format(volume_name, node_name, ', '.join(
- [str(x) for x in result]))
- )
+ # Contains a path of the /dev/drbd form.
+ return resources[0].volumes[0].device_path
- def _destroy_resource(self, resource_name):
+ def _destroy_resource(self, resource_name, force=False):
result = self._linstor.resource_dfn_delete(resource_name)
error_str = self._get_error_str(result)
- if error_str:
+ if not error_str:
+ self._mark_resource_cache_as_dirty()
+ return
+
+ if not force:
+ self._mark_resource_cache_as_dirty()
raise LinstorVolumeManagerError(
- 'Could not destroy resource `{}` from SR `{}`: {}'
+ 'Could not destroy resource `{}` from SR `{}`: {}'
.format(resource_name, self._group_name, error_str)
)
- def _destroy_volume(self, volume_uuid, volume_properties):
- assert volume_properties.namespace == \
- self._build_volume_namespace(volume_uuid)
+ # If force is used, ensure there is no opener.
+ all_openers = get_all_volume_openers(resource_name, '0')
+ for openers in all_openers.itervalues():
+ if openers:
+ self._mark_resource_cache_as_dirty()
+ raise LinstorVolumeManagerError(
+ 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
+ .format(resource_name, self._group_name, error_str, all_openers)
+ )
+
+ # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
+ resource_states = filter(
+ lambda resource_state: resource_state.name == resource_name,
+ self._get_resource_cache().resource_states
+ )
+
+ # Mark only after computation of states.
+ self._mark_resource_cache_as_dirty()
+
+ for resource_state in resource_states:
+ volume_state = resource_state.volume_states[0]
+ if resource_state.in_use:
+ demote_drbd_resource(resource_state.node_name, resource_name)
+ break
+ self._destroy_resource(resource_name)
+ def _destroy_volume(self, volume_uuid, force=False):
+ volume_properties = self._get_volume_properties(volume_uuid)
try:
volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
if volume_name in self._fetch_resource_names():
- self._destroy_resource(volume_name)
+ self._destroy_resource(volume_name, force)
# Assume this call is atomic.
volume_properties.clear()
@@ -1487,19 +2278,8 @@ def _destroy_volume(self, volume_uuid, volume_properties):
'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
)
- def _force_destroy_volume(self, volume_uuid, volume_properties):
- try:
- self._destroy_volume(volume_uuid, volume_properties)
- except Exception as e:
- self._logger('Ignore fail: {}'.format(e))
-
def _build_volumes(self, repair):
- properties = linstor.KV(
- self._get_store_name(),
- uri=self._uri,
- namespace=self._build_volume_namespace()
- )
-
+ properties = self._kv_cache
resource_names = self._fetch_resource_names()
self._volumes = set()
@@ -1517,9 +2297,7 @@ def _build_volumes(self, repair):
self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
)
for volume_uuid, not_exists in existing_volumes.items():
- properties.namespace = self._build_volume_namespace(
- volume_uuid
- )
+ properties.namespace = self._build_volume_namespace(volume_uuid)
src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
if src_uuid:
@@ -1569,7 +2347,7 @@ def _build_volumes(self, repair):
# Little optimization, don't call `self._destroy_volume`,
# we already have resource name list.
if volume_name in resource_names:
- self._destroy_resource(volume_name)
+ self._destroy_resource(volume_name, force=True)
# Assume this call is atomic.
properties.clear()
@@ -1579,37 +2357,42 @@ def _build_volumes(self, repair):
'Cannot clean volume {}: {}'.format(volume_uuid, e)
)
+ # The volume can't be removed, maybe it's still in use,
+ # in this case rename it with the "DELETED_" prefix.
+ # This prefix is mandatory if it exists a snap transaction to
+ # rollback because the original VDI UUID can try to be renamed
+ # with the UUID we are trying to delete...
+ if not volume_uuid.startswith('DELETED_'):
+ self.update_volume_uuid(
+ volume_uuid, 'DELETED_' + volume_uuid, force=True
+ )
+
for dest_uuid, src_uuid in updating_uuid_volumes.items():
- dest_properties = self._get_volume_properties(dest_uuid)
- if int(dest_properties.get(self.PROP_NOT_EXISTS) or
- self.STATE_EXISTS):
- dest_properties.clear()
+ dest_namespace = self._build_volume_namespace(dest_uuid)
+
+ properties.namespace = dest_namespace
+ if int(properties.get(self.PROP_NOT_EXISTS)):
+ properties.clear()
continue
- src_properties = self._get_volume_properties(src_uuid)
- src_properties.clear()
+ properties.namespace = self._build_volume_namespace(src_uuid)
+ properties.clear()
- dest_properties.pop(self.PROP_UPDATING_UUID_SRC)
+ properties.namespace = dest_namespace
+ properties.pop(self.PROP_UPDATING_UUID_SRC)
if src_uuid in self._volumes:
self._volumes.remove(src_uuid)
self._volumes.add(dest_uuid)
def _get_sr_properties(self):
- return linstor.KV(
- self._get_store_name(),
- uri=self._uri,
- namespace=self._build_sr_namespace()
- )
+ return self._create_linstor_kv(self._build_sr_namespace())
def _get_volumes_by_property(
self, reg_prop, ignore_inexisting_volumes=True
):
- base_properties = linstor.KV(
- self._get_store_name(),
- uri=self._uri,
- namespace=self._build_volume_namespace()
- )
+ base_properties = self._get_kv_cache()
+ base_properties.namespace = self._build_volume_namespace()
volume_properties = {}
for volume_uuid in self._volumes:
@@ -1625,15 +2408,17 @@ def _get_volumes_by_property(
return volume_properties
- def _get_volume_properties(self, volume_uuid):
+ def _create_linstor_kv(self, namespace):
return linstor.KV(
- self._get_store_name(),
- uri=self._uri,
- namespace=self._build_volume_namespace(volume_uuid)
+ self._group_name,
+ uri=self._linstor.controller_host(),
+ namespace=namespace
)
- def _get_store_name(self):
- return 'xcp-sr-{}'.format(self._group_name)
+ def _get_volume_properties(self, volume_uuid):
+ properties = self._get_kv_cache()
+ properties.namespace = self._build_volume_namespace(volume_uuid)
+ return properties
@classmethod
def _build_sr_namespace(cls):
@@ -1653,46 +2438,429 @@ def _get_error_str(cls, result):
])
@classmethod
- def _create_linstor_instance(cls, uri):
- def connect():
+ def _create_linstor_instance(
+ cls, uri, keep_uri_unmodified=False, attempt_count=30
+ ):
+ retry = False
+
+ def connect(uri):
+ if not uri:
+ uri = get_controller_uri()
+ if not uri:
+ raise LinstorVolumeManagerError(
+ 'Unable to find controller uri...'
+ )
instance = linstor.Linstor(uri, keep_alive=True)
instance.connect()
return instance
+ try:
+ return connect(uri)
+ except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
+ pass
+
+ if not keep_uri_unmodified:
+ uri = None
+
return util.retry(
- connect,
- maxretry=60,
- exceptions=[linstor.errors.LinstorNetworkError]
+ lambda: connect(uri),
+ maxretry=attempt_count,
+ period=1,
+ exceptions=[
+ linstor.errors.LinstorNetworkError,
+ LinstorVolumeManagerError
+ ]
)
@classmethod
- def _destroy_storage_pool(cls, lin, group_name, node_name):
- result = lin.storage_pool_delete(node_name, group_name)
+ def _configure_volume_peer_slots(cls, lin, volume_name):
+ result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
error_str = cls._get_error_str(result)
if error_str:
raise LinstorVolumeManagerError(
- 'Failed to destroy SP `{}` on node `{}`: {}'.format(
- group_name,
- node_name,
- error_str
+ 'Could not configure volume peer slots of {}: {}'
+ .format(volume_name, error_str)
+ )
+
+ @classmethod
+ def _activate_device_path(cls, lin, node_name, volume_name):
+ result = lin.resource_create([
+ linstor.ResourceData(node_name, volume_name, diskless=True)
+ ])
+ if linstor.Linstor.all_api_responses_no_error(result):
+ return
+ errors = linstor.Linstor.filter_api_call_response_errors(result)
+ if len(errors) == 1 and errors[0].is_error(
+ linstor.consts.FAIL_EXISTS_RSC
+ ):
+ return
+
+ raise LinstorVolumeManagerError(
+ 'Unable to activate device path of `{}` on node `{}`: {}'
+ .format(volume_name, node_name, ', '.join(
+ [str(x) for x in result]))
+ )
+
+ @classmethod
+ def _request_database_path(cls, lin, activate=False):
+ node_name = socket.gethostname()
+
+ try:
+ resources = filter(
+ lambda resource: resource.node_name == node_name and
+ resource.name == DATABASE_VOLUME_NAME,
+ lin.resource_list_raise().resources
+ )
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Unable to get resources during database creation: {}'
+ .format(e)
+ )
+
+ if not resources:
+ if activate:
+ cls._activate_device_path(
+ lin, node_name, DATABASE_VOLUME_NAME
+ )
+ return cls._request_database_path(
+ DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
)
+ raise LinstorVolumeManagerError(
+ 'Empty dev path for `{}`, but definition "seems" to exist'
+ .format(DATABASE_PATH)
)
+ # Contains a path of the /dev/drbd form.
+ return resources[0].volumes[0].device_path
@classmethod
- def _destroy_resource_group(cls, lin, group_name):
- result = lin.resource_group_delete(group_name)
+ def _create_database_volume(
+ cls, lin, group_name, node_names, redundancy, auto_quorum
+ ):
+ try:
+ dfns = lin.resource_dfn_list_raise().resource_definitions
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Unable to get definitions during database creation: {}'
+ .format(e)
+ )
+
+ if dfns:
+ raise LinstorVolumeManagerError(
+ 'Could not create volume `{}` from SR `{}`, '.format(
+ DATABASE_VOLUME_NAME, group_name
+ ) + 'LINSTOR volume list must be empty.'
+ )
+
+ # Workaround to use thin lvm. Without this line an error is returned:
+ # "Not enough available nodes"
+ # I don't understand why but this command protect against this bug.
+ try:
+ pools = lin.storage_pool_list_raise(
+ filter_by_stor_pools=[group_name]
+ )
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Failed to get storage pool list before database creation: {}'
+ .format(e)
+ )
+
+ # Ensure we have a correct list of storage pools.
+ nodes_with_pool = map(lambda pool: pool.node_name, pools.storage_pools)
+ assert nodes_with_pool # We must have at least one storage pool!
+ for node_name in nodes_with_pool:
+ assert node_name in node_names
+ util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
+
+ # Create the database definition.
+ size = cls.round_up_volume_size(DATABASE_SIZE)
+ cls._check_volume_creation_errors(lin.resource_group_spawn(
+ rsc_grp_name=group_name,
+ rsc_dfn_name=DATABASE_VOLUME_NAME,
+ vlm_sizes=['{}B'.format(size)],
+ definitions_only=True
+ ), DATABASE_VOLUME_NAME, group_name)
+ cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
+
+ # Create real resources on the first nodes.
+ resources = []
+
+ diskful_nodes = []
+ diskless_nodes = []
+ for node_name in node_names:
+ if node_name in nodes_with_pool:
+ diskful_nodes.append(node_name)
+ else:
+ diskless_nodes.append(node_name)
+
+ assert diskful_nodes
+ for node_name in diskful_nodes[:redundancy]:
+ util.SMlog('Create database diskful on {}'.format(node_name))
+ resources.append(linstor.ResourceData(
+ node_name=node_name,
+ rsc_name=DATABASE_VOLUME_NAME,
+ storage_pool=group_name
+ ))
+ # Create diskless resources on the remaining set.
+ for node_name in diskful_nodes[redundancy:] + diskless_nodes:
+ util.SMlog('Create database diskless on {}'.format(node_name))
+ resources.append(linstor.ResourceData(
+ node_name=node_name,
+ rsc_name=DATABASE_VOLUME_NAME,
+ diskless=True
+ ))
+
+ result = lin.resource_create(resources)
error_str = cls._get_error_str(result)
if error_str:
raise LinstorVolumeManagerError(
- 'Failed to destroy RG `{}`: {}'.format(group_name, error_str)
+ 'Could not create database volume from SR `{}`: {}'.format(
+ group_name, error_str
+ )
+ )
+
+ # We must modify the quorum. Otherwise we can't use correctly the
+ # drbd-reactor daemon.
+ if auto_quorum:
+ result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, {
+ 'DrbdOptions/auto-quorum': 'disabled',
+ 'DrbdOptions/Resource/quorum': 'majority'
+ })
+ error_str = cls._get_error_str(result)
+ if error_str:
+ raise LinstorVolumeManagerError(
+ 'Could not activate quorum on database volume: {}'
+ .format(error_str)
+ )
+
+ # Create database and ensure path exists locally and
+ # on replicated devices.
+ current_device_path = cls._request_database_path(lin, activate=True)
+
+ # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
+ # plugged.
+ for node_name in node_names:
+ cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
+
+ # We use realpath here to get the /dev/drbd path instead of
+ # /dev/drbd/by-res/.
+ expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
+ util.wait_for_path(expected_device_path, 5)
+
+ device_realpath = os.path.realpath(expected_device_path)
+ if current_device_path != device_realpath:
+ raise LinstorVolumeManagerError(
+ 'Invalid path, current={}, expected={} (realpath={})'
+ .format(
+ current_device_path,
+ expected_device_path,
+ device_realpath
+ )
+ )
+
+ try:
+ util.pread2([DATABASE_MKFS, expected_device_path])
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Failed to execute {} on database volume: {}'
+ .format(DATABASE_MKFS, e)
+ )
+
+ return expected_device_path
+
+ @classmethod
+ def _destroy_database_volume(cls, lin, group_name):
+ error_str = cls._get_error_str(
+ lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
+ )
+ if error_str:
+ raise LinstorVolumeManagerError(
+ 'Could not destroy resource `{}` from SR `{}`: {}'
+ .format(DATABASE_VOLUME_NAME, group_name, error_str)
)
+ @classmethod
+ def _mount_database_volume(cls, volume_path, mount=True, force=False):
+ backup_path = DATABASE_PATH + '-' + str(uuid.uuid4())
+
+ try:
+ # 1. Create a backup config folder.
+ database_not_empty = bool(os.listdir(DATABASE_PATH))
+ if database_not_empty:
+ try:
+ os.mkdir(backup_path)
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Failed to create backup path {} of LINSTOR config: {}'
+ .format(backup_path, e)
+ )
+
+ # 2. Move the config in the mounted volume.
+ if database_not_empty:
+ cls._move_files(DATABASE_PATH, backup_path)
+
+ cls._mount_volume(volume_path, DATABASE_PATH, mount)
+
+ if database_not_empty:
+ cls._move_files(backup_path, DATABASE_PATH, force)
+
+ # 3. Remove useless backup directory.
+ try:
+ os.rmdir(backup_path)
+ except Exception:
+ raise LinstorVolumeManagerError(
+ 'Failed to remove backup path {} of LINSTOR config {}'
+ .format(backup_path, e)
+ )
+ except Exception as e:
+ def force_exec(fn):
+ try:
+ fn()
+ except Exception:
+ pass
+
+ if mount == cls._is_mounted(DATABASE_PATH):
+ force_exec(lambda: cls._move_files(
+ DATABASE_PATH, backup_path
+ ))
+ force_exec(lambda: cls._mount_volume(
+ volume_path, DATABASE_PATH, not mount
+ ))
+
+ if mount != cls._is_mounted(DATABASE_PATH):
+ force_exec(lambda: cls._move_files(
+ backup_path, DATABASE_PATH
+ ))
+
+ force_exec(lambda: os.rmdir(backup_path))
+ raise e
+
+ @classmethod
+ def _force_destroy_database_volume(cls, lin, group_name):
+ try:
+ cls._destroy_database_volume(lin, group_name)
+ except Exception:
+ pass
+
+ @classmethod
+ def _destroy_storage_pool(cls, lin, group_name, node_name):
+ def destroy():
+ result = lin.storage_pool_delete(node_name, group_name)
+ errors = cls._filter_errors(result)
+ if cls._check_errors(errors, [
+ linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
+ linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
+ ]):
+ return
+
+ if errors:
+ raise LinstorVolumeManagerError(
+ 'Failed to destroy SP `{}` on node `{}`: {}'.format(
+ group_name,
+ node_name,
+ cls._get_error_str(errors)
+ )
+ )
+
+ # We must retry to avoid errors like:
+ # "can not be deleted as volumes / snapshot-volumes are still using it"
+ # after LINSTOR database volume destruction.
+ return util.retry(destroy, maxretry=10)
+
+ @classmethod
+ def _destroy_resource_group(cls, lin, group_name):
+ def destroy():
+ result = lin.resource_group_delete(group_name)
+ errors = cls._filter_errors(result)
+ if cls._check_errors(errors, [
+ linstor.consts.FAIL_NOT_FOUND_RSC_GRP
+ ]):
+ return
+
+ if errors:
+ raise LinstorVolumeManagerError(
+ 'Failed to destroy RG `{}`: {}'
+ .format(group_name, cls._get_error_str(errors))
+ )
+
+ return util.retry(destroy, maxretry=10)
+
@classmethod
def _build_group_name(cls, base_name):
# If thin provisioning is used we have a path like this:
# `VG/LV`. "/" is not accepted by LINSTOR.
return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
+ @classmethod
+ def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
+ errors = cls._filter_errors(result)
+ if cls._check_errors(errors, [
+ linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
+ ]):
+ raise LinstorVolumeManagerError(
+ 'Failed to create volume `{}` from SR `{}`, it already exists'
+ .format(volume_uuid, group_name),
+ LinstorVolumeManagerError.ERR_VOLUME_EXISTS
+ )
+
+ if errors:
+ raise LinstorVolumeManagerError(
+ 'Failed to create volume `{}` from SR `{}`: {}'.format(
+ volume_uuid,
+ group_name,
+ cls._get_error_str(errors)
+ )
+ )
+
+ @classmethod
+ def _move_files(cls, src_dir, dest_dir, force=False):
+ def listdir(dir):
+ ignored = ['lost+found']
+ return filter(lambda file: file not in ignored, os.listdir(dir))
+
+ try:
+ if not force:
+ files = listdir(dest_dir)
+ if files:
+ raise LinstorVolumeManagerError(
+ 'Cannot move files from {} to {} because destination '
+ 'contains: {}'.format(src_dir, dest_dir, files)
+ )
+ except LinstorVolumeManagerError:
+ raise
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Cannot list dir {}: {}'.format(dest_dir, e)
+ )
+
+ try:
+ for file in listdir(src_dir):
+ try:
+ dest_file = os.path.join(dest_dir, file)
+ if not force and os.path.exists(dest_file):
+ raise LinstorVolumeManagerError(
+ 'Cannot move {} because it already exists in the '
+ 'destination'.format(file)
+ )
+ shutil.move(os.path.join(src_dir, file), dest_file)
+ except LinstorVolumeManagerError:
+ raise
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Cannot move {}: {}'.format(file, e)
+ )
+ except Exception as e:
+ if not force:
+ try:
+ cls._move_files(dest_dir, src_dir, force=True)
+ except Exception:
+ pass
+
+ raise LinstorVolumeManagerError(
+ 'Failed to move files from {} to {}: {}'.format(
+ src_dir, dest_dir, e
+ )
+ )
+
@staticmethod
def _get_filtered_properties(properties):
return dict(properties.items())
@@ -1711,3 +2879,110 @@ def _check_errors(result, codes):
if err.is_error(code):
return True
return False
+
+ @classmethod
+ def _controller_is_running(cls):
+ return cls._service_is_running('linstor-controller')
+
+ @classmethod
+ def _start_controller(cls, start=True):
+ return cls._start_service('linstor-controller', start)
+
+ @staticmethod
+ def _start_service(name, start=True):
+ action = 'start' if start else 'stop'
+ (ret, out, err) = util.doexec([
+ 'systemctl', action, name
+ ])
+ if ret != 0:
+ raise LinstorVolumeManagerError(
+ 'Failed to {} {}: {} {}'
+ .format(action, name, out, err)
+ )
+
+ @staticmethod
+ def _service_is_running(name):
+ (ret, out, err) = util.doexec([
+ 'systemctl', 'is-active', '--quiet', name
+ ])
+ return not ret
+
+ @staticmethod
+ def _is_mounted(mountpoint):
+ (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
+ return ret == 0
+
+ @classmethod
+ def _mount_volume(cls, volume_path, mountpoint, mount=True):
+ if mount:
+ try:
+ util.pread(['mount', volume_path, mountpoint])
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Failed to mount volume {} on {}: {}'
+ .format(volume_path, mountpoint, e)
+ )
+ else:
+ try:
+ if cls._is_mounted(mountpoint):
+ util.pread(['umount', mountpoint])
+ except Exception as e:
+ raise LinstorVolumeManagerError(
+ 'Failed to umount volume {} on {}: {}'
+ .format(volume_path, mountpoint, e)
+ )
+
+
+# ==============================================================================
+
+# Check if a path is a DRBD resource and log the process name/pid
+# that opened it.
+def log_drbd_openers(path):
+ # Ignore if it's not a symlink to DRBD resource.
+ if not path.startswith(DRBD_BY_RES_PATH):
+ return
+
+ # Compute resource name.
+ res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
+ if res_name_end == -1:
+ return
+ res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
+
+ volume_end = path.rfind('/')
+ if volume_end == res_name_end:
+ return
+ volume = path[volume_end + 1:]
+
+ try:
+ # Ensure path is a DRBD.
+ drbd_path = os.path.realpath(path)
+ stats = os.stat(drbd_path)
+ if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
+ return
+
+ # Find where the device is open.
+ (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
+ if ret != 0:
+ util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
+ res_name, stderr
+ ))
+ return
+
+ # Is it a local device?
+ if stdout.startswith('{} role:Primary'.format(res_name)):
+ util.SMlog(
+ 'DRBD resource `{}` is open on local host: {}'
+ .format(path, get_local_volume_openers(res_name, volume))
+ )
+ return
+
+ # Is it a remote device?
+ util.SMlog(
+ 'DRBD resource `{}` is open on hosts: {}'
+ .format(path, get_all_volume_openers(res_name, volume))
+ )
+ except Exception as e:
+ util.SMlog(
+ 'Got exception while trying to determine where DRBD resource ' +
+ '`{}` is open: {}'.format(path, e)
+ )
diff --git a/drivers/on_slave.py b/drivers/on_slave.py
index bb3f5db65..524424f61 100755
--- a/drivers/on_slave.py
+++ b/drivers/on_slave.py
@@ -79,6 +79,7 @@ def _is_open(session, args):
import CephFSSR
import EXTSR
import GlusterFSSR
+ import LinstorSR
import LVHDSR
import MooseFSSR
import NFSSR
@@ -109,8 +110,28 @@ def _is_open(session, args):
}
cmd.params = {"command": None}
+ sr_uuid = srRec["uuid"]
+
+ # Another ugly piece of code to load a real Linstor SR, otherwise
+ # we can't fetch the VDI path.
+ if srType == 'linstor':
+ host_ref = util.get_this_host_ref(session)
+ sr_ref = session.xenapi.SR.get_by_uuid(sr_uuid)
+
+ pbd = util.find_my_pbd(session, host_ref, sr_ref)
+ if pbd is None:
+ raise util.SMException('Failed to find Linstor PBD')
+
+ cmd.dconf = session.xenapi.PBD.get_device_config(pbd)
+
driver = SR.driver(srType)
- sr = driver(cmd, srRec["uuid"])
+ sr = driver(cmd, sr_uuid)
+
+ # session_ref param is required to have a valid session when SR object is created.
+ # It's not the case here, so attach the current session object to make LinstorSR happy.
+ if srType == 'linstor':
+ sr.session = session
+
vdi = sr.vdi(vdiUuid)
tapdisk = blktap2.Tapdisk.find_by_path(vdi.path)
util.SMlog("Tapdisk for %s: %s" % (vdi.path, tapdisk))
diff --git a/drivers/tapdisk-pause b/drivers/tapdisk-pause
index 932fc3ca6..75328757b 100755
--- a/drivers/tapdisk-pause
+++ b/drivers/tapdisk-pause
@@ -30,7 +30,7 @@ import vhdutil
import lvmcache
try:
- from linstorvolumemanager import LinstorVolumeManager
+ from linstorvolumemanager import get_controller_uri, LinstorVolumeManager
LINSTOR_AVAILABLE = True
except ImportError:
LINSTOR_AVAILABLE = False
@@ -152,10 +152,6 @@ class Tapdisk:
# "B" path. Note: "A", "B" and "OLD_A" are UUIDs.
session = self.session
- linstor_uri = 'linstor://{}'.format(
- util.get_master_rec(session)['address']
- )
-
host_ref = util.get_this_host_ref(session)
sr_ref = session.xenapi.SR.get_by_uuid(self.sr_uuid)
@@ -167,7 +163,7 @@ class Tapdisk:
group_name = dconf['group-name']
device_path = LinstorVolumeManager(
- linstor_uri,
+ get_controller_uri(),
group_name,
logger=util.SMlog
).get_device_path(self.vdi_uuid)
diff --git a/drivers/util.py b/drivers/util.py
index e73482288..376aaf703 100755
--- a/drivers/util.py
+++ b/drivers/util.py
@@ -699,32 +699,10 @@ def get_master_ref(session):
return session.xenapi.pool.get_master(pools[0])
-def get_master_rec(session):
- return session.xenapi.host.get_record(get_master_ref(session))
-
-
def is_master(session):
return get_this_host_ref(session) == get_master_ref(session)
-def get_master_address():
- address = None
- try:
- fd = open('/etc/xensource/pool.conf', 'r')
- try:
- items = fd.readline().split(':')
- if items[0].strip() == 'master':
- address = 'localhost'
- else:
- address = items[1].strip()
- finally:
- fd.close()
- except Exception:
- pass
- return address
-
-
-
def get_localhost_ref(session):
filename = '/etc/xensource-inventory'
try:
@@ -765,6 +743,17 @@ def get_hosts_attached_on(session, vdi_uuids):
host_refs[key[len('host_'):]] = True
return host_refs.keys()
+def get_this_host_address(session):
+ host_uuid = get_this_host()
+ host_ref = session.xenapi.host.get_by_uuid(host_uuid)
+ return session.xenapi.host.get_record(host_ref)['address']
+
+def get_host_addresses(session):
+ addresses = []
+ hosts = session.xenapi.host.get_all_records()
+ for record in hosts.itervalues():
+ addresses.append(record['address'])
+ return addresses
def get_this_host_ref(session):
host_uuid = get_this_host()
@@ -1955,3 +1944,95 @@ def sessions_less_than_targets(other_config, device_config):
return (sessions < targets)
else:
return False
+
+
+def enable_and_start_service(name, start):
+ attempt = 0
+ while True:
+ attempt += 1
+ fn = 'enable' if start else 'disable'
+ args = ('systemctl', fn, '--now', name)
+ (ret, out, err) = doexec(args)
+ if ret == 0:
+ return
+ elif attempt >= 3:
+ raise Exception(
+ 'Failed to {} {}: {} {}'.format(fn, name, out, err)
+ )
+ time.sleep(1)
+
+
+def stop_service(name):
+ args = ('systemctl', 'stop', name)
+ (ret, out, err) = doexec(args)
+ if ret == 0:
+ return
+ raise Exception('Failed to stop {}: {} {}'.format(name, out, err))
+
+
+def restart_service(name):
+ attempt = 0
+ while True:
+ attempt += 1
+ SMlog('Restarting service {} {}...'.format(name, attempt))
+ args = ('systemctl', 'restart', name)
+ (ret, out, err) = doexec(args)
+ if ret == 0:
+ return
+ elif attempt >= 3:
+ SMlog('Restart service FAILED {} {}'.format(name, attempt))
+ raise Exception(
+ 'Failed to restart {}: {} {}'.format(name, out, err)
+ )
+ time.sleep(1)
+
+
+def check_pid_exists(pid):
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ return False
+ else:
+ return True
+
+
+def make_profile(name, function):
+ """
+ Helper to execute cProfile using unique log file.
+ """
+
+ import cProfile
+ import itertools
+ import os.path
+ import time
+
+ assert name
+ assert function
+
+ FOLDER = '/tmp/sm-perfs/'
+ makedirs(FOLDER)
+
+ filename = time.strftime('{}_%Y%m%d_%H%M%S.prof'.format(name))
+
+ def gen_path(path):
+ yield path
+ root, ext = os.path.splitext(path)
+ for i in itertools.count(start=1, step=1):
+ yield root + '.{}.'.format(i) + ext
+
+ for profile_path in gen_path(FOLDER + filename):
+ try:
+ file = open_atomic(profile_path, 'w')
+ file.close()
+ break
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+ try:
+ SMlog('* Start profiling of {} ({}) *'.format(name, filename))
+ cProfile.runctx('function()', None, locals(), profile_path)
+ finally:
+ SMlog('* End profiling of {} ({}) *'.format(name, filename))
diff --git a/drivers/vhdutil.py b/drivers/vhdutil.py
index 3a3027e70..279786ea7 100755
--- a/drivers/vhdutil.py
+++ b/drivers/vhdutil.py
@@ -93,13 +93,16 @@ def ioretry(cmd, text=True):
errlist=[errno.EIO, errno.EAGAIN])
-def getVHDInfo(path, extractUuidFunction, includeParent=True):
+def getVHDInfo(path, extractUuidFunction, includeParent=True, resolveParent=True):
"""Get the VHD info. The parent info may optionally be omitted: vhd-util
tries to verify the parent by opening it, which results in error if the VHD
resides on an inactive LV"""
opts = "-vsf"
if includeParent:
opts += "p"
+ if not resolveParent:
+ opts += "u"
+
cmd = [VHD_UTIL, "query", OPT_LOG_ERR, opts, "-n", path]
ret = ioretry(cmd)
fields = ret.strip().split('\n')
diff --git a/etc/systemd/system/linstor-satellite.service.d/override.conf b/etc/systemd/system/linstor-satellite.service.d/override.conf
new file mode 100644
index 000000000..b1686b4f3
--- /dev/null
+++ b/etc/systemd/system/linstor-satellite.service.d/override.conf
@@ -0,0 +1,5 @@
+[Service]
+Environment=LS_KEEP_RES=^xcp-persistent*
+
+[Unit]
+After=drbd.service
diff --git a/etc/systemd/system/var-lib-linstor.service b/etc/systemd/system/var-lib-linstor.service
new file mode 100644
index 000000000..e9deb9042
--- /dev/null
+++ b/etc/systemd/system/var-lib-linstor.service
@@ -0,0 +1,21 @@
+# Regarding the current version of systemd (v.219) used in XCP-ng, we can't use
+# the ReadWriteOnly option (to apply the -w flag, it's not the same than -o rw).
+# This file is a workaround to avoid RO. It must be replaced with the code below
+# in a mount unit. Compatible with version >= 246.
+#
+# [Unit]
+# Description=Filesystem for the LINSTOR controller
+#
+# [Mount]
+# What=/dev/drbd/by-res/xcp-persistent-database/0
+# Where=/var/lib/linstor
+# ReadWriteOnly=true
+
+[Unit]
+Description=Mount filesystem for the LINSTOR controller
+
+[Service]
+Type=oneshot
+ExecStart=/bin/mount -w /dev/drbd/by-res/xcp-persistent-database/0 /var/lib/linstor
+ExecStop=/opt/xensource/libexec/safe-umount /var/lib/linstor
+RemainAfterExit=true
diff --git a/linstor/linstor-monitord.c b/linstor/linstor-monitord.c
index 8161813d7..47740598c 100644
--- a/linstor/linstor-monitord.c
+++ b/linstor/linstor-monitord.c
@@ -14,8 +14,10 @@
* along with this program. If not, see .
*/
+#include
#include
#include
+#include
#include
#include
#include
@@ -39,7 +41,8 @@
#define POOL_CONF_ABS_FILE POOL_CONF_DIR "/" POOL_CONF_FILE
// In milliseconds.
-#define POLL_TIMEOUT 2000
+#define UPDATE_LINSTOR_NODE_TIMEOUT 2000
+#define SR_SCAN_TIMEOUT 720000
// -----------------------------------------------------------------------------
@@ -130,24 +133,120 @@ static inline int isMasterHost (int *error) {
typedef struct {
int inotifyFd;
+ struct timespec lastScanTime;
+ int isMaster;
// TODO: Should be completed with at least a hostname field.
} State;
// -----------------------------------------------------------------------------
-static inline int execCommand (char *argv[]) {
+typedef struct {
+ char *data;
+ size_t size;
+ size_t capacity;
+} Buffer;
+
+#define max(a, b) ({ \
+ __typeof__(a) _a = (a); \
+ __typeof__(b) _b = (b); \
+ _a > _b ? _a : _b; \
+})
+
+static inline ssize_t readAll (int fd, Buffer *buffer) {
+ assert(buffer->capacity >= buffer->size);
+
+ ssize_t ret = 0;
+ do {
+ size_t byteCount = buffer->capacity - buffer->size;
+ if (byteCount < 16) {
+ const size_t newCapacity = max(buffer->capacity << 1, 64);
+ char *p = realloc(buffer->data, newCapacity);
+ if (!p)
+ return -errno;
+
+ buffer->data = p;
+ buffer->capacity = newCapacity;
+
+ byteCount = buffer->capacity - buffer->size;
+ }
+
+ ret = read(fd, buffer->data + buffer->size, byteCount);
+ if (ret > 0)
+ buffer->size += ret;
+ else if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+ ret = 0;
+ } while (ret > 0);
+
+ return ret;
+}
+
+// -----------------------------------------------------------------------------
+
+static inline int execCommand (char *argv[], Buffer *buffer) {
+ int pipefd[2];
+ if (buffer) {
+ if (pipe(pipefd) < 0) {
+ syslog(LOG_ERR, "Failed to exec pipe: `%s`.", strerror(errno));
+ return -errno;
+ }
+
+ if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) < 0) {
+ syslog(LOG_ERR, "Failed to exec fcntl on pipe in: `%s`.", strerror(errno));
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -errno;
+ }
+ }
+
const pid_t pid = fork();
- if (pid < 0)
+ if (pid < 0) {
+ syslog(LOG_ERR, "Failed to fork: `%s`.", strerror(errno));
+ if (buffer) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ }
return -errno;
+ }
// Child process.
if (pid == 0) {
+ if (buffer) {
+ close(STDOUT_FILENO);
+ dup(pipefd[1]);
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+ }
+
if (execvp(*argv, argv) < 0)
syslog(LOG_ERR, "Failed to exec `%s` command.", *argv);
exit(EXIT_FAILURE);
}
// Main process.
+ int ret = 0;
+ if (buffer) {
+ close(pipefd[1]);
+
+ do {
+ struct pollfd fds = { pipefd[0], POLLIN | POLLHUP, 0 };
+ const int res = poll(&fds, 1, 0);
+ if (res < 0) {
+ if (errno == EAGAIN)
+ continue;
+ syslog(LOG_ERR, "Failed to poll from command: `%s`.", strerror(errno));
+ ret = -errno;
+ } else if (res > 0) {
+ if (fds.revents & POLLIN)
+ ret = readAll(pipefd[0], buffer);
+ if (fds.revents & POLLHUP)
+ break; // Input has been closed.
+ }
+ } while (ret >= 0);
+
+ close(pipefd[0]);
+ }
+
int status;
if (waitpid(pid, &status, 0) < 0) {
syslog(LOG_ERR, "Failed to wait command: `%s`.", *argv);
@@ -163,7 +262,7 @@ static inline int execCommand (char *argv[]) {
} else if (WIFSIGNALED(status))
syslog(LOG_ERR, "`%s` terminated by signal %d.", *argv, WTERMSIG(status));
- return 0;
+ return ret;
}
// -----------------------------------------------------------------------------
@@ -188,23 +287,6 @@ static inline int addInotifyWatch (int inotifyFd, const char *filepath, uint32_t
// -----------------------------------------------------------------------------
-static inline int updateLinstorServices () {
- int error;
- const int isMaster = isMasterHost(&error);
- if (error)
- return error;
-
- syslog(LOG_INFO, "%s linstor-controller...", isMaster ? "Enabling" : "Disabling");
- char *argv[] = {
- "systemctl",
- isMaster ? "enable" : "disable",
- "--now",
- "linstor-controller",
- NULL
- };
- return execCommand(argv);
-}
-
static inline int updateLinstorNode (State *state) {
char buffer[256];
if (gethostname(buffer, sizeof buffer) == -1) {
@@ -219,14 +301,53 @@ static inline int updateLinstorNode (State *state) {
// -----------------------------------------------------------------------------
+#define UUID_PARAM "uuid="
+#define UUID_PARAM_LEN (sizeof(UUID_PARAM) - 1)
+#define UUID_LENGTH 36
+
+static inline void scanLinstorSr (const char *uuid) {
+ char uuidBuf[UUID_LENGTH + UUID_PARAM_LEN + 1] = UUID_PARAM;
+ strncpy(uuidBuf + UUID_PARAM_LEN, uuid, UUID_LENGTH);
+ uuidBuf[UUID_LENGTH + UUID_PARAM_LEN] = '\0';
+ execCommand((char *[]){ "xe", "sr-scan", uuidBuf, NULL }, NULL);
+}
+
+// Called to update the physical/virtual size used by LINSTOR SRs in XAPI DB.
+static inline int scanLinstorSrs () {
+ Buffer srs = {};
+ const int ret = execCommand((char *[]){ "xe", "sr-list", "type=linstor", "--minimal", NULL }, &srs);
+ if (ret) {
+ free(srs.data);
+ return ret;
+ }
+
+ const char *end = srs.data + srs.size;
+ char *pos = srs.data;
+ for (char *off; (off = memchr(pos, ',', end - pos)); pos = off + 1)
+ if (off - pos == UUID_LENGTH)
+ scanLinstorSr(pos);
+
+ if (end - pos >= UUID_LENGTH) {
+ for (--end; end - pos >= UUID_LENGTH && isspace(*end); --end) {}
+ if (isalnum(*end))
+ scanLinstorSr(pos);
+ }
+
+ free(srs.data);
+
+ return 0;
+}
+
+// -----------------------------------------------------------------------------
+
#define PROCESS_MODE_DEFAULT 0
#define PROCESS_MODE_WAIT_FILE_CREATION 1
static inline int waitForPoolConfCreation (State *state, int *wdFile);
-static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, size_t *bufferSize, int mode, int *process) {
+static inline int processPoolConfEvents (State *state, int wd, char **buffer, size_t *bufferSize, int mode, int *process) {
size_t size = 0;
- if (ioctl(inotifyFd, FIONREAD, (char *)&size) == -1) {
+ if (ioctl(state->inotifyFd, FIONREAD, (char *)&size) == -1) {
syslog(LOG_ERR, "Failed to get buffer size from inotify descriptor: `%s`.", strerror(errno));
return -errno;
}
@@ -241,7 +362,7 @@ static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, s
*bufferSize = size;
}
- if ((size = (size_t)read(inotifyFd, *buffer, size)) == (size_t)-1) {
+ if ((size = (size_t)read(state->inotifyFd, *buffer, size)) == (size_t)-1) {
syslog(LOG_ERR, "Failed to read buffer from inotify descriptor: `%s`.", strerror(errno));
return -errno;
}
@@ -280,10 +401,9 @@ static inline int processPoolConfEvents (int inotifyFd, int wd, char **buffer, s
syslog(LOG_INFO, "Updating linstor services... (Inotify mask=%" PRIu32 ")", mask);
if (mask & (IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT)) {
syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been removed!");
- inotify_rm_watch(inotifyFd, wd); // Do not forget to remove watch to avoid leaks.
+ inotify_rm_watch(state->inotifyFd, wd); // Do not forget to remove watch to avoid leaks.
return -EIO;
}
- ret = updateLinstorServices();
} else {
if (mask & (IN_CREATE | IN_MOVED_TO)) {
syslog(LOG_ERR, "Watched `" POOL_CONF_ABS_FILE "` file has been recreated!");
@@ -303,16 +423,24 @@ static inline int waitAndProcessEvents (State *state, int wd, int mode) {
struct timespec previousTime = getCurrentTime();
do {
- struct timespec currentTime = getCurrentTime();
+ const struct timespec currentTime = getCurrentTime();
const int64_t elapsedTime = convertToMilliseconds(getTimeDiff(¤tTime, &previousTime));
int timeout;
- if (elapsedTime >= POLL_TIMEOUT) {
+ if (elapsedTime >= UPDATE_LINSTOR_NODE_TIMEOUT) {
updateLinstorNode(state);
- timeout = POLL_TIMEOUT;
+ timeout = UPDATE_LINSTOR_NODE_TIMEOUT;
previousTime = getCurrentTime();
} else {
- timeout = POLL_TIMEOUT - elapsedTime;
+ timeout = UPDATE_LINSTOR_NODE_TIMEOUT - elapsedTime;
+ }
+
+ const int64_t elapsedScanTime = convertToMilliseconds(getTimeDiff(¤tTime, &state->lastScanTime));
+ if (elapsedScanTime >= SR_SCAN_TIMEOUT) {
+ state->isMaster = isMasterHost(&ret);
+ if (state->isMaster)
+ scanLinstorSrs();
+ state->lastScanTime = getCurrentTime();
}
struct pollfd fds = { state->inotifyFd, POLLIN, 0 };
@@ -323,7 +451,9 @@ static inline int waitAndProcessEvents (State *state, int wd, int mode) {
syslog(LOG_ERR, "Failed to poll from inotify descriptor: `%s`.", strerror(errno));
ret = -errno;
} else if (res > 0) {
- ret = processPoolConfEvents(state->inotifyFd, wd, &buffer, &bufferSize, mode, &process);
+ state->isMaster = isMasterHost(&ret);
+ if (!ret)
+ ret = processPoolConfEvents(state, wd, &buffer, &bufferSize, mode, &process);
}
} while (ret >= 0 && process);
@@ -350,7 +480,8 @@ static inline int waitForPoolConfCreation (State *state, int *wdFile) {
do {
do {
// Update LINSTOR services...
- ret = updateLinstorServices();
+ int ret;
+ state->isMaster = isMasterHost(&ret);
// Ok we can't read the pool configuration file.
// Maybe the file doesn't exist. Waiting its creation...
@@ -378,7 +509,9 @@ int main (int argc, char *argv[]) {
setlogmask(LOG_UPTO(LOG_INFO));
State state = {
- .inotifyFd = -1
+ .inotifyFd = -1,
+ .lastScanTime = getCurrentTime(),
+ .isMaster = 0
};
const int inotifyFd = createInotifyInstance();
diff --git a/scripts/fork-log-daemon b/scripts/fork-log-daemon
new file mode 100755
index 000000000..665a60baf
--- /dev/null
+++ b/scripts/fork-log-daemon
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+import select
+import signal
+import subprocess
+import sys
+import syslog
+
+def main():
+ process = subprocess.Popen(sys.argv[1:], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ signal.signal(signal.SIGTERM, signal.SIG_IGN)
+ write_to_stdout = True
+
+ while process.poll() is None:
+ while True:
+ output = process.stdout.readline()
+ if not output:
+ break
+
+ if write_to_stdout:
+ try:
+ print(output)
+ sys.stdout.flush()
+ except Exception:
+ # Probably a broken pipe. So the process reading stdout is dead.
+ write_to_stdout = False
+ syslog.syslog(output)
+
+if __name__ == "__main__":
+ syslog.openlog(ident=sys.argv[1], facility=syslog.LOG_DAEMON)
+ try:
+ main()
+ except Exception as e:
+ syslog.syslog(sys.argv[1] + ' terminated with exception: {}'.format(e))
+ finally:
+ syslog.syslog(sys.argv[1] + ' is now terminated!')
diff --git a/scripts/linstor-kv-tool b/scripts/linstor-kv-tool
new file mode 100755
index 000000000..c9070270c
--- /dev/null
+++ b/scripts/linstor-kv-tool
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2022 Vates SAS
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import argparse
+import json
+import linstor
+
+
+def dump_kv(controller_uri, group_name, namespace):
+ kv = linstor.KV(
+ group_name,
+ uri=controller_uri,
+ namespace=namespace
+ )
+ print(json.dumps(kv, sort_keys=True, indent=2))
+
+
+def remove_volume(controller_uri, group_name, vdi_name):
+ assert vdi_name
+ kv = linstor.KV(
+ group_name,
+ uri=controller_uri,
+ namespace='/xcp/volume/{}'.format(vdi_name)
+ )
+
+ for key, value in list(kv.items()):
+ del kv[key]
+
+
+def remove_all_volumes(controller_uri, group_name):
+ kv = linstor.KV(
+ group_name,
+ uri=controller_uri,
+ namespace='/'
+ )
+
+ for key, value in list(kv.items()):
+ if key.startswith('xcp/volume/') or key.startswith('xcp/sr/journal/'):
+ size = key.rindex('/')
+ kv.namespace = key[:size]
+ del kv[key[size + 1:]]
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-u', '--uri', required=True)
+ parser.add_argument('-g', '--group-name', required=True)
+ parser.add_argument('-n', '--namespace', default='/')
+
+ action = parser.add_mutually_exclusive_group(required=True)
+ action.add_argument('--dump-volumes', action='store_true')
+ action.add_argument('--remove-volume', metavar='VDI_UUID')
+ action.add_argument('--remove-all-volumes', action='store_true')
+
+ args = parser.parse_args()
+ if args.dump_volumes:
+ dump_kv(args.uri, args.group_name, args.namespace)
+ elif args.remove_volume:
+ remove_volume(args.uri, args.group_name, args.remove_volume)
+ elif args.remove_all_volumes:
+ remove_all_volumes(args.uri, args.group_name)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/safe-umount b/scripts/safe-umount
new file mode 100755
index 000000000..9c1dcc400
--- /dev/null
+++ b/scripts/safe-umount
@@ -0,0 +1,39 @@
+#!/usr/bin/env python2
+
+import argparse
+import subprocess
+import sys
+import time
+
+
+def safe_umount(path):
+ retry_count = 10
+ not_mounted_str = 'umount: {}: not mounted'.format(path)
+
+ last_code = 0
+ while retry_count:
+ proc = subprocess.Popen(['mountpoint', '-q', path])
+ proc.wait()
+ if proc.returncode:
+ return 0
+
+ proc = subprocess.Popen(['umount', path], stderr=subprocess.PIPE)
+ (stdout, stderr) = proc.communicate()
+ if not proc.returncode:
+ return 0
+
+ error = stderr.strip()
+ if error == not_mounted_str:
+ return 0
+
+ retry_count -= 1
+ last_code = proc.returncode
+ time.sleep(0.500)
+ return last_code
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('path')
+ args = parser.parse_args()
+ sys.exit(safe_umount(args.path))
diff --git a/tests/test_on_slave.py b/tests/test_on_slave.py
index 1aad3639a..d6f57130a 100644
--- a/tests/test_on_slave.py
+++ b/tests/test_on_slave.py
@@ -13,7 +13,15 @@
class Test_on_slave_is_open(unittest.TestCase):
- MOCK_IMPORTS = ['SRCommand', 'SR', 'NFSSR', 'EXTSR', 'LVHDSR', 'blktap2']
+ MOCK_IMPORTS = [
+ 'SRCommand',
+ 'SR',
+ 'NFSSR',
+ 'EXTSR',
+ 'LVHDSR',
+ 'LinstorSR',
+ 'blktap2'
+ ]
def fake_import(self, name, *args):
print('Asked to import {}'.format(name))