Skip to content

Commit

Permalink
nfscl: Clear out a lot of cruft related to B_DIRECT
Browse files Browse the repository at this point in the history
There is only one place in the unpatched sources where B_DIRECT is
set in the NFS client and this code is never executed. As such, this patch
removes this code that is never executed, since B_DIRECT should never
be set.

During a IETF testing event this week, I saw a crash in ncl_doio_directwrite(),
but this function is only called if B_DIRECT is set.
I cannot explain how ncl_doio_directwrite() got called, but once this patch
was applied to the sources, the crash did not recur. This is not surprising,
since this patch deleted the function.

(cherry picked from commit 03a39a1)
  • Loading branch information
Rick Macklem authored and Rick Macklem committed May 1, 2024
1 parent 19b6aa0 commit 825cb4c
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 230 deletions.
1 change: 0 additions & 1 deletion sys/fs/nfs/nfs_commonport.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client");
MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner");
MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock");
MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "NFS vnode");
MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "NFS Direct IO buffer");
MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroff",
"NFS directory offset data");
MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback",
Expand Down
2 changes: 0 additions & 2 deletions sys/fs/nfs/nfsport.h
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,6 @@ MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER);
MALLOC_DECLARE(M_NEWNFSCLLOCK);
MALLOC_DECLARE(M_NEWNFSDIROFF);
MALLOC_DECLARE(M_NEWNFSV4NODE);
MALLOC_DECLARE(M_NEWNFSDIRECTIO);
MALLOC_DECLARE(M_NEWNFSMNT);
MALLOC_DECLARE(M_NEWNFSDROLLBACK);
MALLOC_DECLARE(M_NEWNFSLAYOUT);
Expand All @@ -965,7 +964,6 @@ MALLOC_DECLARE(M_NEWNFSDSESSION);
#define M_NFSCLLOCK M_NEWNFSCLLOCK
#define M_NFSDIROFF M_NEWNFSDIROFF
#define M_NFSV4NODE M_NEWNFSV4NODE
#define M_NFSDIRECTIO M_NEWNFSDIRECTIO
#define M_NFSDROLLBACK M_NEWNFSDROLLBACK
#define M_NFSLAYOUT M_NEWNFSLAYOUT
#define M_NFSFLAYOUT M_NEWNFSFLAYOUT
Expand Down
1 change: 0 additions & 1 deletion sys/fs/nfsclient/nfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ enum nfsiod_state {
* Function prototypes.
*/
int ncl_meta_setsize(struct vnode *, struct thread *, u_quad_t);
void ncl_doio_directwrite(struct buf *);
int ncl_bioread(struct vnode *, struct uio *, int, struct ucred *);
int ncl_biowrite(struct vnode *, struct uio *, int, struct ucred *);
int ncl_vinvalbuf(struct vnode *, int, struct thread *, int);
Expand Down
237 changes: 47 additions & 190 deletions sys/fs/nfsclient/nfs_clbio.c
Original file line number Diff line number Diff line change
Expand Up @@ -767,144 +767,58 @@ static int
nfs_directio_write(struct vnode *vp, struct uio *uiop, struct ucred *cred,
int ioflag)
{
int error;
struct uio uio;
struct iovec iov;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
struct thread *td = uiop->uio_td;
int size;
int wsize;
int error, iomode, must_commit, size, wsize;

KASSERT((ioflag & IO_SYNC) != 0, ("nfs_directio_write: not sync"));
mtx_lock(&nmp->nm_mtx);
wsize = nmp->nm_wsize;
mtx_unlock(&nmp->nm_mtx);
if (ioflag & IO_SYNC) {
int iomode, must_commit;
struct uio uio;
struct iovec iov;
do_sync:
while (uiop->uio_resid > 0) {
size = MIN(uiop->uio_resid, wsize);
size = MIN(uiop->uio_iov->iov_len, size);
iov.iov_base = uiop->uio_iov->iov_base;
iov.iov_len = size;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = uiop->uio_offset;
uio.uio_resid = size;
uio.uio_segflg = uiop->uio_segflg;
uio.uio_rw = UIO_WRITE;
uio.uio_td = td;
iomode = NFSWRITE_FILESYNC;
/*
* When doing direct I/O we do not care if the
* server's write verifier has changed, but we
* do not want to update the verifier if it has
* changed, since that hides the change from
* writes being done through the buffer cache.
* By passing must_commit in set to two, the code
* in nfsrpc_writerpc() will not update the
* verifier on the mount point.
*/
must_commit = 2;
error = ncl_writerpc(vp, &uio, cred, &iomode,
&must_commit, 0, ioflag);
KASSERT((must_commit == 2),
("ncl_directio_write: Updated write verifier"));
if (error)
return (error);
if (iomode != NFSWRITE_FILESYNC)
printf("nfs_directio_write: Broken server "
"did not reply FILE_SYNC\n");
uiop->uio_offset += size;
uiop->uio_resid -= size;
if (uiop->uio_iov->iov_len <= size) {
uiop->uio_iovcnt--;
uiop->uio_iov++;
} else {
uiop->uio_iov->iov_base =
(char *)uiop->uio_iov->iov_base + size;
uiop->uio_iov->iov_len -= size;
}
}
} else {
struct uio *t_uio;
struct iovec *t_iov;
struct buf *bp;

while (uiop->uio_resid > 0) {
size = MIN(uiop->uio_resid, wsize);
size = MIN(uiop->uio_iov->iov_len, size);
iov.iov_base = uiop->uio_iov->iov_base;
iov.iov_len = size;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = uiop->uio_offset;
uio.uio_resid = size;
uio.uio_segflg = uiop->uio_segflg;
uio.uio_rw = UIO_WRITE;
uio.uio_td = td;
iomode = NFSWRITE_FILESYNC;
/*
* Break up the write into blocksize chunks and hand these
* over to nfsiod's for write back.
* Unfortunately, this incurs a copy of the data. Since
* the user could modify the buffer before the write is
* initiated.
*
* The obvious optimization here is that one of the 2 copies
* in the async write path can be eliminated by copying the
* data here directly into mbufs and passing the mbuf chain
* down. But that will require a fair amount of re-working
* of the code and can be done if there's enough interest
* in NFS directio access.
* When doing direct I/O we do not care if the
* server's write verifier has changed, but we
* do not want to update the verifier if it has
* changed, since that hides the change from
* writes being done through the buffer cache.
* By passing must_commit in set to two, the code
* in nfsrpc_writerpc() will not update the
* verifier on the mount point.
*/
while (uiop->uio_resid > 0) {
size = MIN(uiop->uio_resid, wsize);
size = MIN(uiop->uio_iov->iov_len, size);
bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
t_iov->iov_len = size;
t_uio->uio_iov = t_iov;
t_uio->uio_iovcnt = 1;
t_uio->uio_offset = uiop->uio_offset;
t_uio->uio_resid = size;
t_uio->uio_segflg = UIO_SYSSPACE;
t_uio->uio_rw = UIO_WRITE;
t_uio->uio_td = td;
KASSERT(uiop->uio_segflg == UIO_USERSPACE ||
uiop->uio_segflg == UIO_SYSSPACE,
("nfs_directio_write: Bad uio_segflg"));
if (uiop->uio_segflg == UIO_USERSPACE) {
error = copyin(uiop->uio_iov->iov_base,
t_iov->iov_base, size);
if (error != 0)
goto err_free;
} else
/*
* UIO_SYSSPACE may never happen, but handle
* it just in case it does.
*/
bcopy(uiop->uio_iov->iov_base, t_iov->iov_base,
size);
bp->b_flags |= B_DIRECT;
bp->b_iocmd = BIO_WRITE;
if (cred != NOCRED) {
crhold(cred);
bp->b_wcred = cred;
} else
bp->b_wcred = NOCRED;
bp->b_caller1 = (void *)t_uio;
bp->b_vp = vp;
error = ncl_asyncio(nmp, bp, NOCRED, td);
err_free:
if (error) {
free(t_iov->iov_base, M_NFSDIRECTIO);
free(t_iov, M_NFSDIRECTIO);
free(t_uio, M_NFSDIRECTIO);
bp->b_vp = NULL;
uma_zfree(ncl_pbuf_zone, bp);
if (error == EINTR)
return (error);
goto do_sync;
}
uiop->uio_offset += size;
uiop->uio_resid -= size;
if (uiop->uio_iov->iov_len <= size) {
uiop->uio_iovcnt--;
uiop->uio_iov++;
} else {
uiop->uio_iov->iov_base =
(char *)uiop->uio_iov->iov_base + size;
uiop->uio_iov->iov_len -= size;
}
must_commit = 2;
error = ncl_writerpc(vp, &uio, cred, &iomode,
&must_commit, 0, ioflag);
KASSERT(must_commit == 2,
("ncl_directio_write: Updated write verifier"));
if (error != 0)
return (error);
if (iomode != NFSWRITE_FILESYNC)
printf("nfs_directio_write: Broken server "
"did not reply FILE_SYNC\n");
uiop->uio_offset += size;
uiop->uio_resid -= size;
if (uiop->uio_iov->iov_len <= size) {
uiop->uio_iovcnt--;
uiop->uio_iov++;
} else {
uiop->uio_iov->iov_base =
(char *)uiop->uio_iov->iov_base + size;
uiop->uio_iov->iov_len -= size;
}
}
return (0);
Expand Down Expand Up @@ -1470,7 +1384,7 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
nanouptime(&ts);
NFSLOCKNODE(np);
}
if (np->n_directio_asyncwr == 0 && (np->n_flag & NMODIFIED) != 0) {
if ((np->n_flag & NMODIFIED) != 0) {
np->n_localmodtime = ts;
np->n_flag &= ~NMODIFIED;
}
Expand Down Expand Up @@ -1615,12 +1529,8 @@ ncl_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
BUF_KERNPROC(bp);
TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen++;
if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
NFSLOCKNODE(VTONFS(bp->b_vp));
VTONFS(bp->b_vp)->n_flag |= NMODIFIED;
VTONFS(bp->b_vp)->n_directio_asyncwr++;
NFSUNLOCKNODE(VTONFS(bp->b_vp));
}
KASSERT((bp->b_flags & B_DIRECT) == 0,
("ncl_asyncio: B_DIRECT set"));
NFSUNLOCKIOD();
return (0);
}
Expand All @@ -1635,59 +1545,6 @@ ncl_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
return (EIO);
}

void
ncl_doio_directwrite(struct buf *bp)
{
int iomode, must_commit;
struct uio *uiop = (struct uio *)bp->b_caller1;
char *iov_base = uiop->uio_iov->iov_base;

iomode = NFSWRITE_FILESYNC;
uiop->uio_td = NULL; /* NULL since we're in nfsiod */
/*
* When doing direct I/O we do not care if the
* server's write verifier has changed, but we
* do not want to update the verifier if it has
* changed, since that hides the change from
* writes being done through the buffer cache.
* By passing must_commit in set to two, the code
* in nfsrpc_writerpc() will not update the
* verifier on the mount point.
*/
must_commit = 2;
ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0, 0);
KASSERT((must_commit == 2), ("ncl_doio_directwrite: Updated write"
" verifier"));
if (iomode != NFSWRITE_FILESYNC)
printf("ncl_doio_directwrite: Broken server "
"did not reply FILE_SYNC\n");
free(iov_base, M_NFSDIRECTIO);
free(uiop->uio_iov, M_NFSDIRECTIO);
free(uiop, M_NFSDIRECTIO);
if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
struct nfsnode *np = VTONFS(bp->b_vp);
NFSLOCKNODE(np);
if (NFSHASPNFS(VFSTONFS(bp->b_vp->v_mount))) {
/*
* Invalidate the attribute cache, since writes to a DS
* won't update the size attribute.
*/
np->n_attrstamp = 0;
}
np->n_directio_asyncwr--;
if (np->n_directio_asyncwr == 0) {
np->n_flag &= ~NMODIFIED;
if ((np->n_flag & NFSYNCWAIT)) {
np->n_flag &= ~NFSYNCWAIT;
wakeup((caddr_t)&np->n_directio_asyncwr);
}
}
NFSUNLOCKNODE(np);
}
bp->b_vp = NULL;
uma_zfree(ncl_pbuf_zone, bp);
}

/*
* Do an I/O operation to/from a cache block. This may be called
* synchronously or from an nfsiod.
Expand Down
19 changes: 8 additions & 11 deletions sys/fs/nfsclient/nfs_clnfsiod.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,17 +292,14 @@ nfssvc_iod(void *instance)
wakeup(&nmp->nm_bufq);
}
NFSUNLOCKIOD();
if (bp->b_flags & B_DIRECT) {
KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set"));
(void)ncl_doio_directwrite(bp);
} else {
if (bp->b_iocmd == BIO_READ)
(void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
NULL, 0);
else
(void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
NULL, 0);
}
KASSERT((bp->b_flags & B_DIRECT) == 0,
("nfssvc_iod: B_DIRECT set"));
if (bp->b_iocmd == BIO_READ)
(void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
NULL, 0);
else
(void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
NULL, 0);
NFSLOCKIOD();
/*
* Make sure the nmp hasn't been dismounted as soon as
Expand Down
24 changes: 2 additions & 22 deletions sys/fs/nfsclient/nfs_clvnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -960,10 +960,6 @@ nfs_close(struct vop_close_args *ap)
error = nfscl_maperr(ap->a_td, error, (uid_t)0,
(gid_t)0);
}
if (newnfs_directio_enable)
KASSERT((np->n_directio_asyncwr == 0),
("nfs_close: dirty unflushed (%d) directio buffers\n",
np->n_directio_asyncwr));
if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
NFSLOCKNODE(np);
KASSERT((np->n_directio_opens > 0),
Expand Down Expand Up @@ -3202,21 +3198,6 @@ ncl_flush(struct vnode *vp, int waitfor, struct thread *td,
* Wait for all the async IO requests to drain
*/
BO_UNLOCK(bo);
NFSLOCKNODE(np);
while (np->n_directio_asyncwr > 0) {
np->n_flag |= NFSYNCWAIT;
error = newnfs_msleep(td, &np->n_directio_asyncwr,
&np->n_mtx, slpflag | (PRIBIO + 1),
"nfsfsync", 0);
if (error) {
if (newnfs_sigintr(nmp, td)) {
NFSUNLOCKNODE(np);
error = EINTR;
goto done;
}
}
}
NFSUNLOCKNODE(np);
} else
BO_UNLOCK(bo);
if (NFSHASPNFS(nmp)) {
Expand All @@ -3234,15 +3215,14 @@ ncl_flush(struct vnode *vp, int waitfor, struct thread *td,
np->n_flag &= ~NWRITEERR;
}
if (commit && bo->bo_dirty.bv_cnt == 0 &&
bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
bo->bo_numoutput == 0)
np->n_flag &= ~NMODIFIED;
NFSUNLOCKNODE(np);
done:
if (bvec != NULL && bvec != bvec_on_stack)
free(bvec, M_TEMP);
if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
(bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
np->n_directio_asyncwr != 0)) {
(bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0)) {
if (trycnt++ < 5) {
/* try, try again... */
passone = 1;
Expand Down
3 changes: 0 additions & 3 deletions sys/fs/nfsclient/nfsnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ struct nfsnode {
short n_fhsize; /* size in bytes, of fh */
u_int32_t n_flag; /* Flag for locking.. */
int n_directio_opens;
int n_directio_asyncwr;
u_int64_t n_change; /* old Change attribute */
struct nfsv4node *n_v4; /* extra V4 stuff */
struct ucred *n_writecred; /* Cred. for putpages */
Expand All @@ -142,8 +141,6 @@ struct nfsnode {
* Flags for n_flag
*/
#define NDIRCOOKIELK 0x00000001 /* Lock to serialize access to directory cookies */
#define NFSYNCWAIT 0x00000002 /* fsync waiting for all directio async
writes to drain */
#define NMODIFIED 0x00000004 /* Might have a modified buffer in bio */
#define NWRITEERR 0x00000008 /* Flag write errors so close will know */
#define NCREATED 0x00000010 /* Opened by nfs_create() */
Expand Down

0 comments on commit 825cb4c

Please sign in to comment.