From 05be5c967debe191beac09ea956a935b3689a144 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 21 Aug 2020 05:23:26 -0400 Subject: [PATCH] lock: do not try to acquire the lock for read fops After the connection was lost or the lock was broke by other nodes, and for the read fops just skip acquiring the lock. Fixes: https://tracker.ceph.com/issues/47076 Signed-off-by: Xiubo Li --- alua.c | 53 ++++++++++++++++++++++++++++++--------- alua.h | 5 ++-- rbd.c | 4 +-- tcmur_cmd_handler.c | 9 ++++--- tcmur_device.c | 60 +++++++++++++++++++++++++++++++-------------- tcmur_device.h | 8 +++--- 6 files changed, 99 insertions(+), 40 deletions(-) diff --git a/alua.c b/alua.c index dc129b1b..096b6462 100644 --- a/alua.c +++ b/alua.c @@ -433,7 +433,7 @@ static int alua_sync_state(struct tcmu_device *dev, * lock state to avoid later blacklist errors. */ pthread_mutex_lock(&rdev->state_lock); - if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKED) { + if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED) { tcmu_dev_dbg(dev, "Dropping lock\n"); rdev->lock_state = TCMUR_DEV_LOCK_UNLOCKED; } @@ -551,7 +551,8 @@ static void alua_event_work_fn(void *arg) tcmu_acquire_dev_lock(dev, -1); } -int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd) +int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd, + bool is_read) { struct tcmur_device *rdev = tcmu_dev_get_private(dev); int ret = TCMU_STS_OK; @@ -560,17 +561,47 @@ int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd) return ret; pthread_mutex_lock(&rdev->state_lock); - if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKED) { + if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED) { + /* For both read/write cases in this state is good */ goto done; - } else if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKING) { - tcmu_dev_dbg(dev, "Lock acquisition operation is already in process.\n"); + } else if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKING) { + /* For both read/write cases in this state should return busy */ + tcmu_dev_dbg(dev, "Write lock acquisition operation is already in process.\n"); ret = TCMU_STS_BUSY; goto done; - } + } else if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKING) { + /* For both read/write they need to retry */ + tcmu_dev_dbg(dev, "Read lock acquisition operation is already in process.\n"); + ret = TCMU_STS_BUSY; + goto done; + } else if (is_read) { + if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKED) + goto done; - tcmu_dev_info(dev, "Starting lock acquisition operation.\n"); + tcmu_dev_info(dev, "Starting read lock acquisition operation.\n"); + + /* + * Since we are here, current lock state should be one of: + * TCMUR_DEV_LOCK_UNLOCKED + * TCMUR_DEV_LOCK_UNKNOWN + * + * Will not acquire the lock, just reopen the device. + */ + rdev->lock_state = TCMUR_DEV_LOCK_READ_LOCKING; + } else { + tcmu_dev_info(dev, "Starting write lock acquisition operation.\n"); - rdev->lock_state = TCMUR_DEV_LOCK_LOCKING; + /* + * Since we are here, current lock state should be one of: + * TCMUR_DEV_LOCK_UNLOCKED + * TCMUR_DEV_LOCK_UNKNOWN + * TCMUR_DEV_LOCK_READ_LOCKING + * TCMUR_DEV_LOCK_READ_LOCKED + * + * May will reopen the deivce and Will acquire the lock later. + */ + rdev->lock_state = TCMUR_DEV_LOCK_WRITE_LOCKING; + } /* * The initiator is going to be queueing commands, so do this @@ -760,17 +791,17 @@ int tcmu_emulate_set_tgt_port_grps(struct tcmu_device *dev, return ret; } -int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd) +int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd, bool is_read) { struct tcmur_device *rdev = tcmu_dev_get_private(dev); if (rdev->failover_type == TCMUR_DEV_FAILOVER_EXPLICIT) { - if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKED) { + if (rdev->lock_state != TCMUR_DEV_LOCK_WRITE_LOCKED) { tcmu_dev_dbg(dev, "device lock not held.\n"); return TCMU_STS_FENCED; } } else if (rdev->failover_type == TCMUR_DEV_FAILOVER_IMPLICIT) { - return alua_implicit_transition(dev, cmd); + return alua_implicit_transition(dev, cmd, is_read); } return TCMU_STS_OK; diff --git a/alua.h b/alua.h index 66f81ffe..51b13462 100644 --- a/alua.h +++ b/alua.h @@ -45,8 +45,9 @@ int tcmu_emulate_set_tgt_port_grps(struct tcmu_device *dev, struct tgt_port *tcmu_get_enabled_port(struct list_head *); int tcmu_get_alua_grps(struct tcmu_device *, struct list_head *); void tcmu_release_alua_grps(struct list_head *); -int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd); +int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd, + bool is_read); bool lock_is_required(struct tcmu_device *dev); -int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd); +int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd, bool is_read); #endif diff --git a/rbd.c b/rbd.c index 887bd0b7..25b18bc8 100644 --- a/rbd.c +++ b/rbd.c @@ -156,7 +156,7 @@ static int tcmu_rbd_report_event(struct tcmu_device *dev) * update, and we do not want one event to overwrite the info. */ return tcmu_rbd_service_status_update(dev, - rdev->lock_state == TCMUR_DEV_LOCK_LOCKED ? true : false); + rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED ? true : false); } static int tcmu_rbd_service_register(struct tcmu_device *dev) @@ -677,7 +677,7 @@ static int tcmu_rbd_get_lock_state(struct tcmu_device *dev) ret = tcmu_rbd_has_lock(dev); if (ret == 1) - return TCMUR_DEV_LOCK_LOCKED; + return TCMUR_DEV_LOCK_WRITE_LOCKED; else if (ret == 0 || ret == -ESHUTDOWN) return TCMUR_DEV_LOCK_UNLOCKED; else diff --git a/tcmur_cmd_handler.c b/tcmur_cmd_handler.c index 132c6682..ca5e21cc 100644 --- a/tcmur_cmd_handler.c +++ b/tcmur_cmd_handler.c @@ -776,7 +776,7 @@ int tcmur_handle_writesame(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd, if (tcmu_dev_in_recovery(dev)) return TCMU_STS_BUSY; - ret = alua_check_state(dev, cmd); + ret = alua_check_state(dev, cmd, false); if (ret) return ret; @@ -1723,7 +1723,7 @@ int tcmur_handle_caw(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd, return TCMU_STS_OK; } - ret = alua_check_state(dev, cmd); + ret = alua_check_state(dev, cmd, false); if (ret) return ret; @@ -2118,6 +2118,7 @@ static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd) struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev); struct tcmur_device *rdev = tcmu_dev_get_private(dev); uint8_t *cdb = cmd->cdb; + bool is_read = false; track_aio_request_start(rdev); @@ -2128,10 +2129,12 @@ static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd) /* Don't perform alua implicit transition if command is not supported */ switch(cdb[0]) { + /* Skip to grab the lock for reads */ case READ_6: case READ_10: case READ_12: case READ_16: + is_read = true; case WRITE_6: case WRITE_10: case WRITE_12: @@ -2146,7 +2149,7 @@ static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd) case WRITE_SAME: case WRITE_SAME_16: case FORMAT_UNIT: - ret = alua_check_state(dev, cmd); + ret = alua_check_state(dev, cmd, is_read); if (ret) goto untrack; break; diff --git a/tcmur_device.c b/tcmur_device.c index 462a9a8e..562e6950 100644 --- a/tcmur_device.c +++ b/tcmur_device.c @@ -60,15 +60,7 @@ int __tcmu_reopen_dev(struct tcmu_device *dev, int retries) */ tcmur_flush_work(rdev->event_work); - /* - * Force a reacquisition of the lock when we have reopend the - * device, so it can update state. If we are being called from - * the lock code path then do not change state. - */ pthread_mutex_lock(&rdev->state_lock); - if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKING) - rdev->lock_state = TCMUR_DEV_LOCK_UNLOCKED; - if (rdev->flags & TCMUR_DEV_FLAG_IS_OPEN) needs_close = true; rdev->flags &= ~TCMUR_DEV_FLAG_IS_OPEN; @@ -281,7 +273,7 @@ void tcmu_notify_lock_lost(struct tcmu_device *dev) * We could be getting stale IO completions. If we are trying to * reaquire the lock do not change state. */ - if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKING) { + if (rdev->lock_state != TCMUR_DEV_LOCK_WRITE_LOCKING) { __tcmu_notify_lock_lost(dev); } pthread_mutex_unlock(&rdev->state_lock); @@ -294,7 +286,7 @@ void tcmu_release_dev_lock(struct tcmu_device *dev) int ret; pthread_mutex_lock(&rdev->state_lock); - if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKED) { + if (rdev->lock_state != TCMUR_DEV_LOCK_WRITE_LOCKED) { pthread_mutex_unlock(&rdev->state_lock); return; } @@ -411,11 +403,21 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag) goto done; } + /* + * Since we are here the lock state must be one of: + * for implicit: + * TCMUR_DEV_LOCK_READ_LOCKING + * TCMUR_DEV_LOCK_WRITE_LOCKING + * + * for explicit: + * TCMUR_DEV_LOCK_UNLOCKED + * TCMUR_DEV_LOCK_UNKNOWN + */ + reopen = false; pthread_mutex_lock(&rdev->state_lock); - if (rdev->lock_lost || !(rdev->flags & TCMUR_DEV_FLAG_IS_OPEN)) { + if (rdev->lock_lost || !(rdev->flags & TCMUR_DEV_FLAG_IS_OPEN)) reopen = true; - } pthread_mutex_unlock(&rdev->state_lock); retry: @@ -434,6 +436,14 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag) } } + pthread_mutex_lock(&rdev->state_lock); + if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKING) { + pthread_mutex_unlock(&rdev->state_lock); + ret = TCMU_STS_OK; + goto done; + } + pthread_mutex_unlock(&rdev->state_lock); + ret = rhandler->lock(dev, tag); if (ret == TCMU_STS_FENCED) { if (retries < 1) { @@ -467,13 +477,25 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag) /* TODO: set UA based on bgly's patches */ pthread_mutex_lock(&rdev->state_lock); - if (ret == TCMU_STS_OK) - rdev->lock_state = TCMUR_DEV_LOCK_LOCKED; - else + if (ret != TCMU_STS_OK) { rdev->lock_state = TCMUR_DEV_LOCK_UNLOCKED; + tcmu_dev_info(dev, "Lock acquisition unsuccessful\n"); + } else { + if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKING) { + rdev->lock_state = TCMUR_DEV_LOCK_READ_LOCKED; + tcmu_dev_info(dev, "Read lock acquisition successful\n"); + } else if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKING) { + rdev->lock_state = TCMUR_DEV_LOCK_WRITE_LOCKED; + tcmu_dev_info(dev, "Write lock acquisition successful\n"); + } else { + /* + * For explicit transition it will always acquire the write lock. + */ + rdev->lock_state = TCMUR_DEV_LOCK_WRITE_LOCKED; + tcmu_dev_info(dev, "Write lock acquisition successful\n"); + } + } - tcmu_dev_info(dev, "Lock acquisition %s\n", - rdev->lock == TCMUR_DEV_LOCK_LOCKED ? "successful" : "unsuccessful"); tcmu_cfgfs_dev_exec_action(dev, "block_dev", 0); pthread_mutex_unlock(&rdev->state_lock); @@ -501,8 +523,8 @@ void tcmu_update_dev_lock_state(struct tcmu_device *dev) state = rhandler->get_lock_state(dev); pthread_mutex_lock(&rdev->state_lock); check_state: - if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKED && - state != TCMUR_DEV_LOCK_LOCKED) { + if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED && + state != TCMUR_DEV_LOCK_WRITE_LOCKED) { tcmu_dev_dbg(dev, "Updated out of sync lock state.\n"); __tcmu_notify_lock_lost(dev); } diff --git a/tcmur_device.h b/tcmur_device.h index 8184887a..62151dad 100644 --- a/tcmur_device.h +++ b/tcmur_device.h @@ -32,10 +32,12 @@ enum { }; enum { - TCMUR_DEV_LOCK_UNLOCKED, - TCMUR_DEV_LOCK_LOCKED, - TCMUR_DEV_LOCK_LOCKING, TCMUR_DEV_LOCK_UNKNOWN, + TCMUR_DEV_LOCK_UNLOCKED, + TCMUR_DEV_LOCK_READ_LOCKING, + TCMUR_DEV_LOCK_READ_LOCKED, + TCMUR_DEV_LOCK_WRITE_LOCKING, + TCMUR_DEV_LOCK_WRITE_LOCKED, }; struct tcmur_work;