Skip to content

Commit

Permalink
lock: do not try to acquire the lock for read fops
Browse files Browse the repository at this point in the history
After the connection was lost or the lock was broke by other nodes,
and for the read fops just skip acquiring the lock.

Fixes: https://tracker.ceph.com/issues/47076
Signed-off-by: Xiubo Li <[email protected]>
  • Loading branch information
lxbsz committed Aug 28, 2020
1 parent ebffeff commit 05be5c9
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 40 deletions.
53 changes: 42 additions & 11 deletions alua.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ static int alua_sync_state(struct tcmu_device *dev,
* lock state to avoid later blacklist errors.
*/
pthread_mutex_lock(&rdev->state_lock);
if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKED) {
if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED) {
tcmu_dev_dbg(dev, "Dropping lock\n");
rdev->lock_state = TCMUR_DEV_LOCK_UNLOCKED;
}
Expand Down Expand Up @@ -551,7 +551,8 @@ static void alua_event_work_fn(void *arg)
tcmu_acquire_dev_lock(dev, -1);
}

int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd,
bool is_read)
{
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
int ret = TCMU_STS_OK;
Expand All @@ -560,17 +561,47 @@ int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
return ret;

pthread_mutex_lock(&rdev->state_lock);
if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKED) {
if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED) {
/* For both read/write cases in this state is good */
goto done;
} else if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKING) {
tcmu_dev_dbg(dev, "Lock acquisition operation is already in process.\n");
} else if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKING) {
/* For both read/write cases in this state should return busy */
tcmu_dev_dbg(dev, "Write lock acquisition operation is already in process.\n");
ret = TCMU_STS_BUSY;
goto done;
}
} else if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKING) {
/* For both read/write they need to retry */
tcmu_dev_dbg(dev, "Read lock acquisition operation is already in process.\n");
ret = TCMU_STS_BUSY;
goto done;
} else if (is_read) {
if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKED)
goto done;

tcmu_dev_info(dev, "Starting lock acquisition operation.\n");
tcmu_dev_info(dev, "Starting read lock acquisition operation.\n");

/*
* Since we are here, current lock state should be one of:
* TCMUR_DEV_LOCK_UNLOCKED
* TCMUR_DEV_LOCK_UNKNOWN
*
* Will not acquire the lock, just reopen the device.
*/
rdev->lock_state = TCMUR_DEV_LOCK_READ_LOCKING;
} else {
tcmu_dev_info(dev, "Starting write lock acquisition operation.\n");

rdev->lock_state = TCMUR_DEV_LOCK_LOCKING;
/*
* Since we are here, current lock state should be one of:
* TCMUR_DEV_LOCK_UNLOCKED
* TCMUR_DEV_LOCK_UNKNOWN
* TCMUR_DEV_LOCK_READ_LOCKING
* TCMUR_DEV_LOCK_READ_LOCKED
*
* May will reopen the deivce and Will acquire the lock later.
*/
rdev->lock_state = TCMUR_DEV_LOCK_WRITE_LOCKING;
}

/*
* The initiator is going to be queueing commands, so do this
Expand Down Expand Up @@ -760,17 +791,17 @@ int tcmu_emulate_set_tgt_port_grps(struct tcmu_device *dev,
return ret;
}

int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd, bool is_read)
{
struct tcmur_device *rdev = tcmu_dev_get_private(dev);

if (rdev->failover_type == TCMUR_DEV_FAILOVER_EXPLICIT) {
if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKED) {
if (rdev->lock_state != TCMUR_DEV_LOCK_WRITE_LOCKED) {
tcmu_dev_dbg(dev, "device lock not held.\n");
return TCMU_STS_FENCED;
}
} else if (rdev->failover_type == TCMUR_DEV_FAILOVER_IMPLICIT) {
return alua_implicit_transition(dev, cmd);
return alua_implicit_transition(dev, cmd, is_read);
}

return TCMU_STS_OK;
Expand Down
5 changes: 3 additions & 2 deletions alua.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ int tcmu_emulate_set_tgt_port_grps(struct tcmu_device *dev,
struct tgt_port *tcmu_get_enabled_port(struct list_head *);
int tcmu_get_alua_grps(struct tcmu_device *, struct list_head *);
void tcmu_release_alua_grps(struct list_head *);
int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd);
int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd,
bool is_read);
bool lock_is_required(struct tcmu_device *dev);
int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd);
int alua_check_state(struct tcmu_device *dev, struct tcmulib_cmd *cmd, bool is_read);

#endif
4 changes: 2 additions & 2 deletions rbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ static int tcmu_rbd_report_event(struct tcmu_device *dev)
* update, and we do not want one event to overwrite the info.
*/
return tcmu_rbd_service_status_update(dev,
rdev->lock_state == TCMUR_DEV_LOCK_LOCKED ? true : false);
rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED ? true : false);
}

static int tcmu_rbd_service_register(struct tcmu_device *dev)
Expand Down Expand Up @@ -677,7 +677,7 @@ static int tcmu_rbd_get_lock_state(struct tcmu_device *dev)

ret = tcmu_rbd_has_lock(dev);
if (ret == 1)
return TCMUR_DEV_LOCK_LOCKED;
return TCMUR_DEV_LOCK_WRITE_LOCKED;
else if (ret == 0 || ret == -ESHUTDOWN)
return TCMUR_DEV_LOCK_UNLOCKED;
else
Expand Down
9 changes: 6 additions & 3 deletions tcmur_cmd_handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ int tcmur_handle_writesame(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd,
if (tcmu_dev_in_recovery(dev))
return TCMU_STS_BUSY;

ret = alua_check_state(dev, cmd);
ret = alua_check_state(dev, cmd, false);
if (ret)
return ret;

Expand Down Expand Up @@ -1723,7 +1723,7 @@ int tcmur_handle_caw(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd,
return TCMU_STS_OK;
}

ret = alua_check_state(dev, cmd);
ret = alua_check_state(dev, cmd, false);
if (ret)
return ret;

Expand Down Expand Up @@ -2118,6 +2118,7 @@ static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
uint8_t *cdb = cmd->cdb;
bool is_read = false;

track_aio_request_start(rdev);

Expand All @@ -2128,10 +2129,12 @@ static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd)

/* Don't perform alua implicit transition if command is not supported */
switch(cdb[0]) {
/* Skip to grab the lock for reads */
case READ_6:
case READ_10:
case READ_12:
case READ_16:
is_read = true;
case WRITE_6:
case WRITE_10:
case WRITE_12:
Expand All @@ -2146,7 +2149,7 @@ static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
case WRITE_SAME:
case WRITE_SAME_16:
case FORMAT_UNIT:
ret = alua_check_state(dev, cmd);
ret = alua_check_state(dev, cmd, is_read);
if (ret)
goto untrack;
break;
Expand Down
60 changes: 41 additions & 19 deletions tcmur_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,7 @@ int __tcmu_reopen_dev(struct tcmu_device *dev, int retries)
*/
tcmur_flush_work(rdev->event_work);

/*
* Force a reacquisition of the lock when we have reopend the
* device, so it can update state. If we are being called from
* the lock code path then do not change state.
*/
pthread_mutex_lock(&rdev->state_lock);
if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKING)
rdev->lock_state = TCMUR_DEV_LOCK_UNLOCKED;

if (rdev->flags & TCMUR_DEV_FLAG_IS_OPEN)
needs_close = true;
rdev->flags &= ~TCMUR_DEV_FLAG_IS_OPEN;
Expand Down Expand Up @@ -281,7 +273,7 @@ void tcmu_notify_lock_lost(struct tcmu_device *dev)
* We could be getting stale IO completions. If we are trying to
* reaquire the lock do not change state.
*/
if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKING) {
if (rdev->lock_state != TCMUR_DEV_LOCK_WRITE_LOCKING) {
__tcmu_notify_lock_lost(dev);
}
pthread_mutex_unlock(&rdev->state_lock);
Expand All @@ -294,7 +286,7 @@ void tcmu_release_dev_lock(struct tcmu_device *dev)
int ret;

pthread_mutex_lock(&rdev->state_lock);
if (rdev->lock_state != TCMUR_DEV_LOCK_LOCKED) {
if (rdev->lock_state != TCMUR_DEV_LOCK_WRITE_LOCKED) {
pthread_mutex_unlock(&rdev->state_lock);
return;
}
Expand Down Expand Up @@ -411,11 +403,21 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag)
goto done;
}

/*
* Since we are here the lock state must be one of:
* for implicit:
* TCMUR_DEV_LOCK_READ_LOCKING
* TCMUR_DEV_LOCK_WRITE_LOCKING
*
* for explicit:
* TCMUR_DEV_LOCK_UNLOCKED
* TCMUR_DEV_LOCK_UNKNOWN
*/

reopen = false;
pthread_mutex_lock(&rdev->state_lock);
if (rdev->lock_lost || !(rdev->flags & TCMUR_DEV_FLAG_IS_OPEN)) {
if (rdev->lock_lost || !(rdev->flags & TCMUR_DEV_FLAG_IS_OPEN))
reopen = true;
}
pthread_mutex_unlock(&rdev->state_lock);

retry:
Expand All @@ -434,6 +436,14 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag)
}
}

pthread_mutex_lock(&rdev->state_lock);
if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKING) {
pthread_mutex_unlock(&rdev->state_lock);
ret = TCMU_STS_OK;
goto done;
}
pthread_mutex_unlock(&rdev->state_lock);

ret = rhandler->lock(dev, tag);
if (ret == TCMU_STS_FENCED) {
if (retries < 1) {
Expand Down Expand Up @@ -467,13 +477,25 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag)

/* TODO: set UA based on bgly's patches */
pthread_mutex_lock(&rdev->state_lock);
if (ret == TCMU_STS_OK)
rdev->lock_state = TCMUR_DEV_LOCK_LOCKED;
else
if (ret != TCMU_STS_OK) {
rdev->lock_state = TCMUR_DEV_LOCK_UNLOCKED;
tcmu_dev_info(dev, "Lock acquisition unsuccessful\n");
} else {
if (rdev->lock_state == TCMUR_DEV_LOCK_READ_LOCKING) {
rdev->lock_state = TCMUR_DEV_LOCK_READ_LOCKED;
tcmu_dev_info(dev, "Read lock acquisition successful\n");
} else if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKING) {
rdev->lock_state = TCMUR_DEV_LOCK_WRITE_LOCKED;
tcmu_dev_info(dev, "Write lock acquisition successful\n");
} else {
/*
* For explicit transition it will always acquire the write lock.
*/
rdev->lock_state = TCMUR_DEV_LOCK_WRITE_LOCKED;
tcmu_dev_info(dev, "Write lock acquisition successful\n");
}
}

tcmu_dev_info(dev, "Lock acquisition %s\n",
rdev->lock == TCMUR_DEV_LOCK_LOCKED ? "successful" : "unsuccessful");
tcmu_cfgfs_dev_exec_action(dev, "block_dev", 0);

pthread_mutex_unlock(&rdev->state_lock);
Expand Down Expand Up @@ -501,8 +523,8 @@ void tcmu_update_dev_lock_state(struct tcmu_device *dev)
state = rhandler->get_lock_state(dev);
pthread_mutex_lock(&rdev->state_lock);
check_state:
if (rdev->lock_state == TCMUR_DEV_LOCK_LOCKED &&
state != TCMUR_DEV_LOCK_LOCKED) {
if (rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED &&
state != TCMUR_DEV_LOCK_WRITE_LOCKED) {
tcmu_dev_dbg(dev, "Updated out of sync lock state.\n");
__tcmu_notify_lock_lost(dev);
}
Expand Down
8 changes: 5 additions & 3 deletions tcmur_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ enum {
};

enum {
TCMUR_DEV_LOCK_UNLOCKED,
TCMUR_DEV_LOCK_LOCKED,
TCMUR_DEV_LOCK_LOCKING,
TCMUR_DEV_LOCK_UNKNOWN,
TCMUR_DEV_LOCK_UNLOCKED,
TCMUR_DEV_LOCK_READ_LOCKING,
TCMUR_DEV_LOCK_READ_LOCKED,
TCMUR_DEV_LOCK_WRITE_LOCKING,
TCMUR_DEV_LOCK_WRITE_LOCKED,
};

struct tcmur_work;
Expand Down

0 comments on commit 05be5c9

Please sign in to comment.