From 7cbd88ac812478b31b3ff50ad4fa9e4d376a21fd Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Thu, 30 May 2019 18:59:18 -0400 Subject: [PATCH] allow setting of 'max_pages' (via 'fuse_msg_size') Linux 4.20 and above allow setting the number of pages per FUSE message upto 256 (4K * 256 = 1MiB). This can greatly increase read and write speeds depending on the workload. --- README.md | 115 +++++++++++----------- libfuse/Makefile | 4 +- libfuse/include/fuse_common.h | 14 ++- libfuse/lib/fuse.c | 9 +- libfuse/lib/fuse_kern_chan.c | 27 +++--- libfuse/lib/fuse_lowlevel.c | 26 ++--- libfuse/lib/helper.c | 143 +++++++++++++++------------- man/mergerfs.1 | 173 ++++++++++++++++++++-------------- src/config.cpp | 1 + src/config.hpp | 1 + src/fuse_getxattr.cpp | 14 +++ src/fuse_init.cpp | 17 ++++ src/fuse_listxattr.cpp | 1 + src/option_parser.cpp | 26 +++++ 14 files changed, 353 insertions(+), 218 deletions(-) diff --git a/README.md b/README.md index 4784fff40..69c8c1672 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2019-05-23 +% 2019-06-03 # NAME @@ -67,33 +67,34 @@ mergerfs does **not** support the copy-on-write (CoW) behavior found in **aufs** ### mount options * **allow_other**: A libfuse option which allows users besides the one which ran mergerfs to see the filesystem. This is required for most use-cases. -* **minfreespace=value**: The minimum space value used for creation policies. Understands 'K', 'M', and 'G' to represent kilobyte, megabyte, and gigabyte respectively. (default: 4G) -* **moveonenospc=true|false**: When enabled if a **write** fails with **ENOSPC** or **EDQUOT** a scan of all drives will be done looking for the drive with the most free space which is at least the size of the file plus the amount which failed to write. An attempt to move the file to that drive will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false) +* **minfreespace=SIZE**: The minimum space value used for creation policies. Understands 'K', 'M', and 'G' to represent kilobyte, megabyte, and gigabyte respectively. (default: 4G) +* **moveonenospc=BOOL**: When enabled if a **write** fails with **ENOSPC** or **EDQUOT** a scan of all drives will be done looking for the drive with the most free space which is at least the size of the file plus the amount which failed to write. An attempt to move the file to that drive will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false) * **use_ino**: Causes mergerfs to supply file/directory inodes rather than libfuse. While not a default it is recommended it be enabled so that linked files share the same inode value. -* **dropcacheonclose=true|false**: When a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when **cache.files=partial|full|auto-full** to limit double caching. (default: false) -* **symlinkify=true|false**: When enabled and a file is not writable and its mtime or ctime is older than **symlinkify_timeout** files will be reported as symlinks to the original files. Please read more below before using. (default: false) -* **symlinkify_timeout=value**: Time to wait, in seconds, to activate the **symlinkify** behavior. (default: 3600) -* **nullrw=true|false**: Turns reads and writes into no-ops. The request will succeed but do nothing. Useful for benchmarking mergerfs. (default: false) -* **ignorepponrename=true|false**: Ignore path preserving on rename. Typically rename and link act differently depending on the policy of `create` (read below). Enabling this will cause rename and link to always use the non-path preserving behavior. This means files, when renamed or linked, will stay on the same drive. (default: false) -* **security_capability=true|false**: If false return ENOATTR when xattr security.capability is queried. (default: true) +* **dropcacheonclose=BOOL**: When a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when **cache.files=partial|full|auto-full** to limit double caching. (default: false) +* **symlinkify=BOOL**: When enabled and a file is not writable and its mtime or ctime is older than **symlinkify_timeout** files will be reported as symlinks to the original files. Please read more below before using. (default: false) +* **symlinkify_timeout=INT**: Time to wait, in seconds, to activate the **symlinkify** behavior. (default: 3600) +* **nullrw=BOOL**: Turns reads and writes into no-ops. The request will succeed but do nothing. Useful for benchmarking mergerfs. (default: false) +* **ignorepponrename=BOOL**: Ignore path preserving on rename. Typically rename and link act differently depending on the policy of `create` (read below). Enabling this will cause rename and link to always use the non-path preserving behavior. This means files, when renamed or linked, will stay on the same drive. (default: false) +* **security_capability=BOOL**: If false return ENOATTR when xattr security.capability is queried. (default: true) * **xattr=passthrough|noattr|nosys**: Runtime control of xattrs. Default is to passthrough xattr requests. 'noattr' will short circuit as if nothing exists. 'nosys' will respond with ENOSYS as if xattrs are not supported or disabled. (default: passthrough) -* **link_cow=true|false**: When enabled if a regular file is opened which has a link count > 1 it will copy the file to a temporary file and rename over the original. Breaking the link and providing a basic copy-on-write function similar to cow-shell. (default: false) +* **link_cow=BOOL**: When enabled if a regular file is opened which has a link count > 1 it will copy the file to a temporary file and rename over the original. Breaking the link and providing a basic copy-on-write function similar to cow-shell. (default: false) * **statfs=base|full**: Controls how statfs works. 'base' means it will always use all branches in statfs calculations. 'full' is in effect path preserving and only includes drives where the path exists. (default: base) * **statfs_ignore=none|ro|nc**: 'ro' will cause statfs calculations to ignore available space for branches mounted or tagged as 'read-only' or 'no create'. 'nc' will ignore available space for branches tagged as 'no create'. (default: none) -* **posix_acl=true|false:** Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) -* **async_read=true|false:** Perform reads asynchronously. If disabled or unavailable the kernel will ensure there is at most one pending read request per file handle and will attempt to order requests by offset. (default: true) -* **threads=num**: Number of threads to use in multithreaded mode. When set to zero it will attempt to discover and use the number of logical cores. If the lookup fails it will fall back to using 4. If the thread count is set negative it will look up the number of cores then divide by the absolute value. ie. threads=-2 on an 8 core machine will result in 8 / 2 = 4 threads. There will always be at least 1 thread. NOTE: higher number of threads increases parallelism but usually decreases throughput. (default: 0) -* **fsname=name**: Sets the name of the filesystem as seen in **mount**, **df**, etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. -* **func.<func>=<policy>**: Sets the specific FUSE function's policy. See below for the list of value types. Example: **func.getattr=newest** -* **category.<category>=<policy>**: Sets policy of all FUSE functions in the provided category. Example: **category.create=mfs** -* **cache.open=<int>**: 'open' policy cache timeout in seconds. (default: 0) -* **cache.statfs=<int>**: 'statfs' cache timeout in seconds. (default: 0) -* **cache.attr=<int>**: File attribute cache timeout in seconds. (default: 1) -* **cache.entry=<int>**: File name lookup cache timeout in seconds. (default: 1) -* **cache.negative_entry=<int>**: Negative file name lookup cache timeout in seconds. (default: 0) +* **posix_acl=BOOL**: Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) +* **async_read=BOOL**: Perform reads asynchronously. If disabled or unavailable the kernel will ensure there is at most one pending read request per file handle and will attempt to order requests by offset. (default: true) +* **fuse_msg_size=INT**: Set the max number of pages per FUSE message. Only available on Linux >= 4.20 and ignored otherwise. (min: 1; max: 256; default: 256) +* **threads=INT**: Number of threads to use in multithreaded mode. When set to zero it will attempt to discover and use the number of logical cores. If the lookup fails it will fall back to using 4. If the thread count is set negative it will look up the number of cores then divide by the absolute value. ie. threads=-2 on an 8 core machine will result in 8 / 2 = 4 threads. There will always be at least 1 thread. NOTE: higher number of threads increases parallelism but usually decreases throughput. (default: 0) +* **fsname=STR**: Sets the name of the filesystem as seen in **mount**, **df**, etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. +* **func.FUNC=POLICY**: Sets the specific FUSE function's policy. See below for the list of value types. Example: **func.getattr=newest** +* **category.CATEGORY=POLICY**: Sets policy of all FUSE functions in the provided category. Example: **category.create=mfs** +* **cache.open=INT**: 'open' policy cache timeout in seconds. (default: 0) +* **cache.statfs=INT**: 'statfs' cache timeout in seconds. (default: 0) +* **cache.attr=INT**: File attribute cache timeout in seconds. (default: 1) +* **cache.entry=INT**: File name lookup cache timeout in seconds. (default: 1) +* **cache.negative_entry=INT**: Negative file name lookup cache timeout in seconds. (default: 0) * **cache.files=libfuse|off|partial|full|auto-full**: File page caching mode (default: libfuse) -* **cache.symlinks=<bool>**: Cache symlinks (if supported by kernel) (default: false) -* **cache.readdir=<bool>**: Cache readdir (if supported by kernel) (default: false) +* **cache.symlinks=BOOL**: Cache symlinks (if supported by kernel) (default: false) +* **cache.readdir=BOOL**: Cache readdir (if supported by kernel) (default: false) * **direct_io**: deprecated - Bypass page cache. Use `cache.files=off` instead. (default: false) * **kernel_cache**: deprecated - Do not invalidate data cache on file open. Use `cache.files=full` instead. (default: false) * **auto_cache**: deprecated - Invalidate data cache if file mtime or size change. Use `cache.files=auto-full` instead. (default: false) @@ -104,6 +105,17 @@ mergerfs does **not** support the copy-on-write (CoW) behavior found in **aufs** **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. +#### Value Types + +* BOOL = 'true' | 'false' +* INT = [0,MAX_INT] +* SIZE = 'NNM'; NN = INT, M = 'K' | 'M' | 'G' | 'T' +* STR = string +* FUNC = FUSE function +* CATEGORY = FUSE function category +* POLICY = mergerfs function policy + + ### branches The 'branches' (formerly 'srcmounts') argument is a colon (':') delimited list of paths to be pooled together. It does not matter if the paths are on the same or different drives nor does it matter the filesystem. Used and available space will not be duplicated for paths on the same device and any features which aren't supported by the underlying filesystem (such as file attributes or extended attributes) will return the appropriate errors. @@ -130,6 +142,15 @@ To have the pool mounted at boot or otherwise accessable from related tools use **NOTE:** for mounting via **fstab** to work you must have **mount.fuse** installed. For Ubuntu/Debian it is included in the **fuse** package. +### fuse_msg_size + +FUSE applications communicate with the kernel over a special character device: `/dev/fuse`. A large portion of the overhead associated with FUSE is the cost of going back and forth from user space and kernel space over that device. Generally speaking the fewer trips needed the better the performance will be. Reducing the number of trips can be done a number of ways. Kernel level caching and increasing message sizes being two significant ones. When it comes to reads and writes if the message size is doubled the number of trips are appoximately halved. + +In Linux 4.20 a new feature was added allowing the negotiation of the max message size. Since the size is in multiples of [pages](https://en.wikipedia.org/wiki/Page_(computer_memory)) the feature is called `max_pages`. There is a maximum `max_pages` value of 256 (1MiB) and minimum of 1 (4KiB). The default used by Linux >=4.20, and hardcoded value used before 4.20, is 32 (128KiB). In mergerfs its referred to as `fuse_msg_size` to make it clear what it impacts and provide some abstraction. + +Since there should be no downsides to increasing `fuse_msg_size` / `max_pages`, outside a minor bump in RAM usage due to larger message buffers, mergerfs defaults the value to 256. On kernels before 4.20 the value has no effect. The reason the value is configurable is to enable experimentation and benchmarking. See the `nullrw` section for benchmarking examples. + + ### symlinkify Due to the levels of indirection introduced by mergerfs and the underlying technology FUSE there can be varying levels of performance degredation. This feature will turn non-directories which are not writable into symlinks to the original file found by the `readlink` policy after the mtime and ctime are older than the timeout. @@ -147,22 +168,22 @@ By enabling `nullrw` mergerfs will work as it always does **except** that all re Example: ``` -$ dd if=/dev/zero of=/path/to/mergerfs/mount/benchmark ibs=1M obs=512 count=1024 conv=fdatasync +$ dd if=/dev/zero of=/path/to/mergerfs/mount/benchmark ibs=1M obs=512 count=1024 iflag=dsync,nocache oflag=dsync,nocache conv=fdatasync status=progress 1024+0 records in 2097152+0 records out 1073741824 bytes (1.1 GB, 1.0 GiB) copied, 15.4067 s, 69.7 MB/s -$ dd if=/dev/zero of=/path/to/mergerfs/mount/benchmark ibs=1M obs=1M count=1024 conv=fdatasync +$ dd if=/dev/zero of=/path/to/mergerfs/mount/benchmark ibs=1M obs=1M count=1024 iflag=dsync,nocache oflag=dsync,nocache conv=fdatasync status=progress 1024+0 records in 1024+0 records out 1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.219585 s, 4.9 GB/s -$ dd if=/path/to/mergerfs/mount/benchmark of=/dev/null bs=512 count=102400 conv=fdatasync +$ dd if=/path/to/mergerfs/mount/benchmark of=/dev/null bs=512 count=102400 iflag=dsync,nocache oflag=dsync,nocache conv=fdatasync status=progress 102400+0 records in 102400+0 records out 52428800 bytes (52 MB, 50 MiB) copied, 0.757991 s, 69.2 MB/s -$ dd if=/path/to/mergerfs/mount/benchmark of=/dev/null bs=1M count=1024 conv=fdatasync +$ dd if=/path/to/mergerfs/mount/benchmark of=/dev/null bs=1M count=1024 iflag=dsync,nocache oflag=dsync,nocache conv=fdatasync status=progress 1024+0 records in 1024+0 records out 1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.18405 s, 5.8 GB/s @@ -388,7 +409,12 @@ Any changes made at runtime are **not** persisted. If you wish for values to per ##### Keys ##### -Use `xattr -l /mountpoint/.mergerfs` to see all supported keys. Some are informational and therefore read-only. +Use `xattr -l /mountpoint/.mergerfs` to see all supported keys. Some are informational and therefore read-only. `setxattr` will return EINVAL on read-only keys. + + +##### Values ##### + +Same as the command line. ###### user.mergerfs.branches ###### @@ -411,32 +437,11 @@ Used to query or modify the list of branches. When modifying there are several s The `=NC`, `=RO`, `=RW` syntax works just as on the command line. -###### minfreespace ###### - -Input: interger with an optional multiplier suffix. **K**, **M**, or **G**. - -Output: value in bytes - - -###### moveonenospc ###### - -Input: **true** and **false** - -Ouput: **true** or **false** - - -###### categories / funcs ###### - -Input: short policy string as described elsewhere in this document - -Output: the policy string except for categories where its funcs have multiple types. In that case it will be a comma separated list - - ##### Example ##### ``` [trapexit:/mnt/mergerfs] $ xattr -l .mergerfs -user.mergerfs.branches: /mnt/a:/mnt/b +user.mergerfs.branches: /mnt/a=RW:/mnt/b=RW user.mergerfs.minfreespace: 4294967295 user.mergerfs.moveonenospc: false ... @@ -466,10 +471,10 @@ newest While they won't show up when using [listxattr](http://linux.die.net/man/2/listxattr) **mergerfs** offers a number of special xattrs to query information about the files served. To access the values you will need to issue a [getxattr](http://linux.die.net/man/2/getxattr) for one of the following: -* **user.mergerfs.basepath:** the base mount point for the file given the current getattr policy -* **user.mergerfs.relpath:** the relative path of the file from the perspective of the mount point -* **user.mergerfs.fullpath:** the full path of the original file given the getattr policy -* **user.mergerfs.allpaths:** a NUL ('\0') separated list of full paths to all files found +* **user.mergerfs.basepath**: the base mount point for the file given the current getattr policy +* **user.mergerfs.relpath**: the relative path of the file from the perspective of the mount point +* **user.mergerfs.fullpath**: the full path of the original file given the getattr policy +* **user.mergerfs.allpaths**: a NUL ('\0') separated list of full paths to all files found ``` [trapexit:/mnt/mergerfs] $ ls @@ -561,7 +566,7 @@ As of version 4.20 Linux supports readdir caching. This can have a significant i #### writeback caching -writeback caching is a technique for improving write speeds by batching writes at a faster device and then bulk writing to the slower device. With FUSE the kernel will wait for a number of writes to be made and then send it to the filesystem as one request. mergerfs currently uses a slightly modified and vendored libfuse 2.9.7 which does not support writeback caching. However, a prototype port to libfuse 3.x has been made and the writeback cache appears to work as expected (though performance improvements greatly depend on the way the client app writes data). Once the port is complete and thoroughly tested writeback caching will be available. +writeback caching is a technique for improving write speeds by batching writes at a faster device and then bulk writing to the slower device. With FUSE the kernel will wait for a number of writes to be made and then send it to the filesystem as one request. mergerfs currently uses a modified and vendored libfuse 2.9.7 which does not support writeback caching. Adding said feature should not be difficult but benchmarking needs to be done to see if what effect it will have. #### tiered caching diff --git a/libfuse/Makefile b/libfuse/Makefile index 80b2160bc..15f771461 100644 --- a/libfuse/Makefile +++ b/libfuse/Makefile @@ -1,5 +1,5 @@ VERSION = "2.9.7-mergerfs_2.28.0" -OPT = -O2 +OPTS = -O2 ifeq ($(DEBUG),1) DEBUG_FLAGS := -g @@ -28,7 +28,7 @@ OBJS = $(SRC:lib/%.c=build/%.o) DEPS = $(SRC:lib/%.c=build/%.d) CFLAGS += \ - $(OPT) \ + $(OPTS) \ $(DEBUG_FLAGS) \ -Wall \ -pipe \ diff --git a/libfuse/include/fuse_common.h b/libfuse/include/fuse_common.h index 6f1585f69..b53e8b50c 100644 --- a/libfuse/include/fuse_common.h +++ b/libfuse/include/fuse_common.h @@ -33,6 +33,9 @@ #error Please add -D_FILE_OFFSET_BITS=64 to your compile flags! #endif +#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 +#define FUSE_MAX_MAX_PAGES 256 + #ifdef __cplusplus extern "C" { #endif @@ -121,6 +124,7 @@ fuse_file_info #define FUSE_CAP_PARALLEL_DIROPS (1 << 18) #define FUSE_CAP_POSIX_ACL (1 << 19) #define FUSE_CAP_CACHE_SYMLINKS (1 << 20) +#define FUSE_CAP_MAX_PAGES (1 << 21) /** @@ -188,10 +192,15 @@ struct fuse_conn_info { */ unsigned congestion_threshold; + /** + * Max pages + */ + uint16_t max_pages; + /** * For future use. */ - unsigned reserved[23]; + unsigned reserved[22]; }; struct fuse_session; @@ -208,7 +217,8 @@ struct fuse_pollhandle; * @param args argument vector * @return the communication channel on success, NULL on failure */ -struct fuse_chan *fuse_mount(const char *mountpoint, struct fuse_args *args); +struct fuse_chan *fuse_mount(const char *mountpoint, + struct fuse_args *args); /** * Umount a FUSE mountpoint diff --git a/libfuse/lib/fuse.c b/libfuse/lib/fuse.c index d9c46eff4..77e437a7e 100644 --- a/libfuse/lib/fuse.c +++ b/libfuse/lib/fuse.c @@ -4775,9 +4775,11 @@ void fuse_destroy(struct fuse *f) fuse_delete_context_key(); } -static struct fuse *fuse_new_common_compat25(int fd, struct fuse_args *args, - const struct fuse_operations *op, - size_t op_size, int compat) +static +struct fuse * +fuse_new_common_compat25(int fd, struct fuse_args *args, + const struct fuse_operations *op, + size_t op_size, int compat) { struct fuse *f = NULL; struct fuse_chan *ch = fuse_kern_chan_new(fd); @@ -4805,6 +4807,7 @@ static struct fuse *fuse_new_common_compat(int fd, const char *opts, fuse_opt_free_args(&args); return NULL; } + f = fuse_new_common_compat25(fd, &args, op, op_size, compat); fuse_opt_free_args(&args); diff --git a/libfuse/lib/fuse_kern_chan.c b/libfuse/lib/fuse_kern_chan.c index 4a9beb8f8..e8448a5f6 100644 --- a/libfuse/lib/fuse_kern_chan.c +++ b/libfuse/lib/fuse_kern_chan.c @@ -83,16 +83,21 @@ static void fuse_kern_chan_destroy(struct fuse_chan *ch) close(fd); } -#define MIN_BUFSIZE 0x21000 - -struct fuse_chan *fuse_kern_chan_new(int fd) +struct fuse_chan * +fuse_kern_chan_new(int fd_) { - struct fuse_chan_ops op = { - .receive = fuse_kern_chan_receive, - .send = fuse_kern_chan_send, - .destroy = fuse_kern_chan_destroy, - }; - size_t bufsize = getpagesize() + 0x1000; - bufsize = bufsize < MIN_BUFSIZE ? MIN_BUFSIZE : bufsize; - return fuse_chan_new(&op, fd, bufsize, NULL); + long pagesize; + size_t bufsize; + struct fuse_chan_ops op = + { + .receive = fuse_kern_chan_receive, + .send = fuse_kern_chan_send, + .destroy = fuse_kern_chan_destroy, + }; + + pagesize = sysconf(_SC_PAGESIZE); + + bufsize = ((FUSE_MAX_MAX_PAGES * pagesize) + 0x1000); + + return fuse_chan_new(&op, fd_, bufsize, NULL); } diff --git a/libfuse/lib/fuse_lowlevel.c b/libfuse/lib/fuse_lowlevel.c index 19ba9836e..01f35b450 100644 --- a/libfuse/lib/fuse_lowlevel.c +++ b/libfuse/lib/fuse_lowlevel.c @@ -1749,8 +1749,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) f->conn.want = 0; memset(&outarg, 0, sizeof(outarg)); - outarg.major = FUSE_KERNEL_VERSION; - outarg.minor = FUSE_KERNEL_MINOR_VERSION; + + outarg.major = FUSE_KERNEL_VERSION; + outarg.minor = FUSE_KERNEL_MINOR_VERSION; + outarg.max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; if (arg->major < 7) { fprintf(stderr, "fuse: unsupported protocol version: %u.%u\n", @@ -1790,6 +1792,8 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) f->conn.capable |= FUSE_CAP_ASYNC_DIO; if (arg->flags & FUSE_PARALLEL_DIROPS) f->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; + if (arg->flags & FUSE_MAX_PAGES) + f->conn.capable |= FUSE_CAP_MAX_PAGES; } else { f->conn.want &= ~FUSE_CAP_ASYNC_READ; f->conn.max_readahead = 0; @@ -1812,14 +1816,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->conn.proto_minor >= 18) f->conn.capable |= FUSE_CAP_IOCTL_DIR; - if (f->atomic_o_trunc) - f->conn.want |= FUSE_CAP_ATOMIC_O_TRUNC; if (f->op.getlk && f->op.setlk && !f->no_remote_posix_lock) f->conn.want |= FUSE_CAP_POSIX_LOCKS; if (f->op.flock && !f->no_remote_flock) f->conn.want |= FUSE_CAP_FLOCK_LOCKS; - if (f->big_writes) - f->conn.want |= FUSE_CAP_BIG_WRITES; if (bufsize < FUSE_MIN_READ_BUFFER) { fprintf(stderr, "fuse: warning: buffer size too small: %zu\n", @@ -1842,6 +1842,12 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (f->no_splice_move) f->conn.want &= ~FUSE_CAP_SPLICE_MOVE; + if ((arg->flags & FUSE_MAX_PAGES) && (f->conn.want & FUSE_CAP_MAX_PAGES)) + { + outarg.flags |= FUSE_MAX_PAGES; + outarg.max_pages = f->conn.max_pages; + } + if (f->conn.want & FUSE_CAP_ASYNC_READ) outarg.flags |= FUSE_ASYNC_READ; if (f->conn.want & FUSE_CAP_POSIX_LOCKS) @@ -1890,6 +1896,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) outarg.max_background); fprintf(stderr, " congestion_threshold=%i\n", outarg.congestion_threshold); + fprintf(stderr, " max_pages=%d\n",outarg.max_pages); } size_t outargsize; @@ -2520,17 +2527,14 @@ static const struct fuse_opt fuse_ll_opts[] = { { "debug", offsetof(struct fuse_ll, debug), 1 }, { "-d", offsetof(struct fuse_ll, debug), 1 }, { "allow_root", offsetof(struct fuse_ll, allow_root), 1 }, - { "max_write=%u", offsetof(struct fuse_ll, conn.max_write), 0 }, { "max_readahead=%u", offsetof(struct fuse_ll, conn.max_readahead), 0 }, { "max_background=%u", offsetof(struct fuse_ll, conn.max_background), 0 }, { "congestion_threshold=%u", offsetof(struct fuse_ll, conn.congestion_threshold), 0 }, - { "atomic_o_trunc", offsetof(struct fuse_ll, atomic_o_trunc), 1}, { "no_remote_lock", offsetof(struct fuse_ll, no_remote_posix_lock), 1}, { "no_remote_lock", offsetof(struct fuse_ll, no_remote_flock), 1}, { "no_remote_flock", offsetof(struct fuse_ll, no_remote_flock), 1}, { "no_remote_posix_lock", offsetof(struct fuse_ll, no_remote_posix_lock), 1}, - { "big_writes", offsetof(struct fuse_ll, big_writes), 1}, { "splice_write", offsetof(struct fuse_ll, splice_write), 1}, { "no_splice_write", offsetof(struct fuse_ll, no_splice_write), 1}, { "splice_move", offsetof(struct fuse_ll, splice_move), 1}, @@ -2554,12 +2558,9 @@ static void fuse_ll_version(void) static void fuse_ll_help(void) { fprintf(stderr, -" -o max_write=N set maximum size of write requests\n" " -o max_readahead=N set maximum readahead\n" " -o max_background=N set number of maximum background requests\n" " -o congestion_threshold=N set kernel's congestion threshold\n" -" -o atomic_o_trunc enable atomic open+truncate support\n" -" -o big_writes enable larger than 4kB writes\n" " -o no_remote_lock disable remote file locking\n" " -o no_remote_flock disable remote file locking (BSD)\n" " -o no_remote_posix_lock disable remove file locking (POSIX)\n" @@ -2764,7 +2765,6 @@ struct fuse_session *fuse_lowlevel_new_common(struct fuse_args *args, f->conn.max_write = UINT_MAX; f->conn.max_readahead = UINT_MAX; - f->atomic_o_trunc = 0; list_init_req(&f->list); list_init_req(&f->interrupts); list_init_nreq(&f->notify_list); diff --git a/libfuse/lib/helper.c b/libfuse/lib/helper.c index 49d30f995..75bb291bc 100644 --- a/libfuse/lib/helper.c +++ b/libfuse/lib/helper.c @@ -28,23 +28,26 @@ enum { KEY_VERSION, }; -struct helper_opts { - int singlethread; - int foreground; - int nodefault_subtype; - char *mountpoint; +struct helper_opts +{ + int singlethread; + int foreground; + int nodefault_subtype; + char *mountpoint; }; #define FUSE_HELPER_OPT(t, p) { t, offsetof(struct helper_opts, p), 1 } -static const struct fuse_opt fuse_helper_opts[] = { +static +const +struct fuse_opt fuse_helper_opts[] = + { FUSE_HELPER_OPT("-d", foreground), FUSE_HELPER_OPT("debug", foreground), FUSE_HELPER_OPT("-f", foreground), FUSE_HELPER_OPT("-s", singlethread), FUSE_HELPER_OPT("fsname=", nodefault_subtype), FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("-h", KEY_HELP), FUSE_OPT_KEY("--help", KEY_HELP), FUSE_OPT_KEY("-ho", KEY_HELP_NOHEADER), @@ -144,37 +147,46 @@ static int add_default_subtype(const char *progname, struct fuse_args *args) return res; } -int fuse_parse_cmdline(struct fuse_args *args, char **mountpoint, - int *multithreaded, int *foreground) +int +fuse_parse_cmdline(struct fuse_args *args_, + char **mountpoint_, + int *multithreaded_, + int *foreground_) { - int res; - struct helper_opts hopts; - - memset(&hopts, 0, sizeof(hopts)); - res = fuse_opt_parse(args, &hopts, fuse_helper_opts, - fuse_helper_opt_proc); - if (res == -1) - return -1; - - if (!hopts.nodefault_subtype) { - res = add_default_subtype(args->argv[0], args); - if (res == -1) - goto err; - } - if (mountpoint) - *mountpoint = hopts.mountpoint; - else - free(hopts.mountpoint); - - if (multithreaded) - *multithreaded = !hopts.singlethread; - if (foreground) - *foreground = hopts.foreground; - return 0; - -err: - free(hopts.mountpoint); - return -1; + int res; + struct helper_opts hopts; + + memset(&hopts, 0, sizeof(hopts)); + + res = fuse_opt_parse(args_, + &hopts, + fuse_helper_opts, + fuse_helper_opt_proc); + if(res == -1) + return -1; + + if(!hopts.nodefault_subtype) + { + res = add_default_subtype(args_->argv[0], args_); + if(res == -1) + goto err; + } + + if(mountpoint_) + *mountpoint_ = hopts.mountpoint; + else + free(hopts.mountpoint); + + if(multithreaded_) + *multithreaded_ = !hopts.singlethread; + if(foreground_) + *foreground_ = hopts.foreground; + + return 0; + + err: + free(hopts.mountpoint); + return -1; } int fuse_daemonize(int foreground) @@ -229,36 +241,41 @@ int fuse_daemonize(int foreground) return 0; } -static struct fuse_chan *fuse_mount_common(const char *mountpoint, - struct fuse_args *args) +static +struct fuse_chan * +fuse_mount_common(const char *mountpoint_, + struct fuse_args *args_) { - struct fuse_chan *ch; - int fd; - - /* - * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos - * would ensue. - */ - do { - fd = open("/dev/null", O_RDWR); - if (fd > 2) - close(fd); - } while (fd >= 0 && fd <= 2); - - fd = fuse_mount_compat25(mountpoint, args); - if (fd == -1) - return NULL; - - ch = fuse_kern_chan_new(fd); - if (!ch) - fuse_kern_unmount(mountpoint, fd); - - return ch; + struct fuse_chan *ch; + int fd; + + /* + * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos + * would ensue. + */ + do + { + fd = open("/dev/null", O_RDWR); + if(fd > 2) + close(fd); + } while(fd >= 0 && fd <= 2); + + fd = fuse_mount_compat25(mountpoint_, args_); + if(fd == -1) + return NULL; + + ch = fuse_kern_chan_new(fd); + if(!ch) + fuse_kern_unmount(mountpoint_, fd); + + return ch; } -struct fuse_chan *fuse_mount(const char *mountpoint, struct fuse_args *args) +struct fuse_chan * +fuse_mount(const char *mountpoint_, + struct fuse_args *args_) { - return fuse_mount_common(mountpoint, args); + return fuse_mount_common(mountpoint_,args_); } static void fuse_unmount_common(const char *mountpoint, struct fuse_chan *ch) diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 3de97533f..2ad43df61 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.19.2.4 .\" -.TH "mergerfs" "1" "2019\-05\-23" "mergerfs user manual" "" +.TH "mergerfs" "1" "2019\-06\-03" "mergerfs user manual" "" .hy .SH NAME .PP @@ -86,16 +86,16 @@ so you can mix read\-write and read\-only drives. one which ran mergerfs to see the filesystem. This is required for most use\-cases. .IP \[bu] 2 -\f[B]minfreespace=value\f[]: The minimum space value used for creation +\f[B]minfreespace=SIZE\f[]: The minimum space value used for creation policies. Understands \[aq]K\[aq], \[aq]M\[aq], and \[aq]G\[aq] to represent kilobyte, megabyte, and gigabyte respectively. (default: 4G) .IP \[bu] 2 -\f[B]moveonenospc=true|false\f[]: When enabled if a \f[B]write\f[] fails -with \f[B]ENOSPC\f[] or \f[B]EDQUOT\f[] a scan of all drives will be -done looking for the drive with the most free space which is at least -the size of the file plus the amount which failed to write. +\f[B]moveonenospc=BOOL\f[]: When enabled if a \f[B]write\f[] fails with +\f[B]ENOSPC\f[] or \f[B]EDQUOT\f[] a scan of all drives will be done +looking for the drive with the most free space which is at least the +size of the file plus the amount which failed to write. An attempt to move the file to that drive will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. @@ -106,29 +106,29 @@ than libfuse. While not a default it is recommended it be enabled so that linked files share the same inode value. .IP \[bu] 2 -\f[B]dropcacheonclose=true|false\f[]: When a file is requested to be -closed call \f[C]posix_fadvise\f[] on it first to instruct the kernel -that we no longer need the data and it can drop its cache. +\f[B]dropcacheonclose=BOOL\f[]: When a file is requested to be closed +call \f[C]posix_fadvise\f[] on it first to instruct the kernel that we +no longer need the data and it can drop its cache. Recommended when \f[B]cache.files=partial|full|auto\-full\f[] to limit double caching. (default: false) .IP \[bu] 2 -\f[B]symlinkify=true|false\f[]: When enabled and a file is not writable -and its mtime or ctime is older than \f[B]symlinkify_timeout\f[] files -will be reported as symlinks to the original files. +\f[B]symlinkify=BOOL\f[]: When enabled and a file is not writable and +its mtime or ctime is older than \f[B]symlinkify_timeout\f[] files will +be reported as symlinks to the original files. Please read more below before using. (default: false) .IP \[bu] 2 -\f[B]symlinkify_timeout=value\f[]: Time to wait, in seconds, to activate +\f[B]symlinkify_timeout=INT\f[]: Time to wait, in seconds, to activate the \f[B]symlinkify\f[] behavior. (default: 3600) .IP \[bu] 2 -\f[B]nullrw=true|false\f[]: Turns reads and writes into no\-ops. +\f[B]nullrw=BOOL\f[]: Turns reads and writes into no\-ops. The request will succeed but do nothing. Useful for benchmarking mergerfs. (default: false) .IP \[bu] 2 -\f[B]ignorepponrename=true|false\f[]: Ignore path preserving on rename. +\f[B]ignorepponrename=BOOL\f[]: Ignore path preserving on rename. Typically rename and link act differently depending on the policy of \f[C]create\f[] (read below). Enabling this will cause rename and link to always use the non\-path @@ -136,8 +136,8 @@ preserving behavior. This means files, when renamed or linked, will stay on the same drive. (default: false) .IP \[bu] 2 -\f[B]security_capability=true|false\f[]: If false return ENOATTR when -xattr security.capability is queried. +\f[B]security_capability=BOOL\f[]: If false return ENOATTR when xattr +security.capability is queried. (default: true) .IP \[bu] 2 \f[B]xattr=passthrough|noattr|nosys\f[]: Runtime control of xattrs. @@ -147,8 +147,8 @@ Default is to passthrough xattr requests. or disabled. (default: passthrough) .IP \[bu] 2 -\f[B]link_cow=true|false\f[]: When enabled if a regular file is opened -which has a link count > 1 it will copy the file to a temporary file and +\f[B]link_cow=BOOL\f[]: When enabled if a regular file is opened which +has a link count > 1 it will copy the file to a temporary file and rename over the original. Breaking the link and providing a basic copy\-on\-write function similar to cow\-shell. @@ -168,17 +168,22 @@ calculations to ignore available space for branches mounted or tagged as create\[aq]. (default: none) .IP \[bu] 2 -\f[B]posix_acl=true|false:\f[] Enable POSIX ACL support (if supported by +\f[B]posix_acl=BOOL\f[]: Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) .IP \[bu] 2 -\f[B]async_read=true|false:\f[] Perform reads asynchronously. +\f[B]async_read=BOOL\f[]: Perform reads asynchronously. If disabled or unavailable the kernel will ensure there is at most one pending read request per file handle and will attempt to order requests by offset. (default: true) .IP \[bu] 2 -\f[B]threads=num\f[]: Number of threads to use in multithreaded mode. +\f[B]fuse_msg_size=INT\f[]: Set the max number of pages per FUSE +message. +Only available on Linux >= 4.20 and ignored otherwise. +(min: 1; max: 256; default: 256) +.IP \[bu] 2 +\f[B]threads=INT\f[]: Number of threads to use in multithreaded mode. When set to zero it will attempt to discover and use the number of logical cores. If the lookup fails it will fall back to using 4. @@ -191,44 +196,42 @@ NOTE: higher number of threads increases parallelism but usually decreases throughput. (default: 0) .IP \[bu] 2 -\f[B]fsname=name\f[]: Sets the name of the filesystem as seen in +\f[B]fsname=STR\f[]: Sets the name of the filesystem as seen in \f[B]mount\f[], \f[B]df\f[], etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. .IP \[bu] 2 -\f[B]func.=\f[]: Sets the specific FUSE function\[aq]s -policy. +\f[B]func.FUNC=POLICY\f[]: Sets the specific FUSE function\[aq]s policy. See below for the list of value types. Example: \f[B]func.getattr=newest\f[] .IP \[bu] 2 -\f[B]category.=\f[]: Sets policy of all FUSE functions -in the provided category. +\f[B]category.CATEGORY=POLICY\f[]: Sets policy of all FUSE functions in +the provided category. Example: \f[B]category.create=mfs\f[] .IP \[bu] 2 -\f[B]cache.open=\f[]: \[aq]open\[aq] policy cache timeout in -seconds. +\f[B]cache.open=INT\f[]: \[aq]open\[aq] policy cache timeout in seconds. (default: 0) .IP \[bu] 2 -\f[B]cache.statfs=\f[]: \[aq]statfs\[aq] cache timeout in seconds. +\f[B]cache.statfs=INT\f[]: \[aq]statfs\[aq] cache timeout in seconds. (default: 0) .IP \[bu] 2 -\f[B]cache.attr=\f[]: File attribute cache timeout in seconds. +\f[B]cache.attr=INT\f[]: File attribute cache timeout in seconds. (default: 1) .IP \[bu] 2 -\f[B]cache.entry=\f[]: File name lookup cache timeout in seconds. +\f[B]cache.entry=INT\f[]: File name lookup cache timeout in seconds. (default: 1) .IP \[bu] 2 -\f[B]cache.negative_entry=\f[]: Negative file name lookup cache +\f[B]cache.negative_entry=INT\f[]: Negative file name lookup cache timeout in seconds. (default: 0) .IP \[bu] 2 \f[B]cache.files=libfuse|off|partial|full|auto\-full\f[]: File page caching mode (default: libfuse) .IP \[bu] 2 -\f[B]cache.symlinks=\f[]: Cache symlinks (if supported by kernel) +\f[B]cache.symlinks=BOOL\f[]: Cache symlinks (if supported by kernel) (default: false) .IP \[bu] 2 -\f[B]cache.readdir=\f[]: Cache readdir (if supported by kernel) +\f[B]cache.readdir=BOOL\f[]: Cache readdir (if supported by kernel) (default: false) .IP \[bu] 2 \f[B]direct_io\f[]: deprecated \- Bypass page cache. @@ -255,6 +258,22 @@ Use \f[C]async_read=false\f[] instead. options are \f[B]func.rmdir=rand,category.action=ff\f[] the \f[B]action\f[] category setting will override the \f[B]rmdir\f[] setting. +.SS Value Types +.IP \[bu] 2 +BOOL = \[aq]true\[aq] | \[aq]false\[aq] +.IP \[bu] 2 +INT = [0,MAX_INT] +.IP \[bu] 2 +SIZE = \[aq]NNM\[aq]; NN = INT, M = \[aq]K\[aq] | \[aq]M\[aq] | +\[aq]G\[aq] | \[aq]T\[aq] +.IP \[bu] 2 +STR = string +.IP \[bu] 2 +FUNC = FUSE function +.IP \[bu] 2 +CATEGORY = FUSE function category +.IP \[bu] 2 +POLICY = mergerfs function policy .SS branches .PP The \[aq]branches\[aq] (formerly \[aq]srcmounts\[aq]) argument is a @@ -310,6 +329,39 @@ be automatically included. \f[B]NOTE:\f[] for mounting via \f[B]fstab\f[] to work you must have \f[B]mount.fuse\f[] installed. For Ubuntu/Debian it is included in the \f[B]fuse\f[] package. +.SS fuse_msg_size +.PP +FUSE applications communicate with the kernel over a special character +device: \f[C]/dev/fuse\f[]. +A large portion of the overhead associated with FUSE is the cost of +going back and forth from user space and kernel space over that device. +Generally speaking the fewer trips needed the better the performance +will be. +Reducing the number of trips can be done a number of ways. +Kernel level caching and increasing message sizes being two significant +ones. +When it comes to reads and writes if the message size is doubled the +number of trips are appoximately halved. +.PP +In Linux 4.20 a new feature was added allowing the negotiation of the +max message size. +Since the size is in multiples of +pages (https://en.wikipedia.org/wiki/Page_(computer_memory)) the feature +is called \f[C]max_pages\f[]. +There is a maximum \f[C]max_pages\f[] value of 256 (1MiB) and minimum of +1 (4KiB). +The default used by Linux >=4.20, and hardcoded value used before 4.20, +is 32 (128KiB). +In mergerfs its referred to as \f[C]fuse_msg_size\f[] to make it clear +what it impacts and provide some abstraction. +.PP +Since there should be no downsides to increasing \f[C]fuse_msg_size\f[] +/ \f[C]max_pages\f[], outside a minor bump in RAM usage due to larger +message buffers, mergerfs defaults the value to 256. +On kernels before 4.20 the value has no effect. +The reason the value is configurable is to enable experimentation and +benchmarking. +See the \f[C]nullrw\f[] section for benchmarking examples. .SS symlinkify .PP Due to the levels of indirection introduced by mergerfs and the @@ -353,22 +405,22 @@ Example: .IP .nf \f[C] -$\ dd\ if=/dev/zero\ of=/path/to/mergerfs/mount/benchmark\ ibs=1M\ obs=512\ count=1024\ conv=fdatasync +$\ dd\ if=/dev/zero\ of=/path/to/mergerfs/mount/benchmark\ ibs=1M\ obs=512\ count=1024\ iflag=dsync,nocache\ oflag=dsync,nocache\ conv=fdatasync\ status=progress 1024+0\ records\ in 2097152+0\ records\ out 1073741824\ bytes\ (1.1\ GB,\ 1.0\ GiB)\ copied,\ 15.4067\ s,\ 69.7\ MB/s -$\ dd\ if=/dev/zero\ of=/path/to/mergerfs/mount/benchmark\ ibs=1M\ obs=1M\ count=1024\ conv=fdatasync +$\ dd\ if=/dev/zero\ of=/path/to/mergerfs/mount/benchmark\ ibs=1M\ obs=1M\ count=1024\ iflag=dsync,nocache\ oflag=dsync,nocache\ conv=fdatasync\ status=progress 1024+0\ records\ in 1024+0\ records\ out 1073741824\ bytes\ (1.1\ GB,\ 1.0\ GiB)\ copied,\ 0.219585\ s,\ 4.9\ GB/s -$\ dd\ if=/path/to/mergerfs/mount/benchmark\ of=/dev/null\ bs=512\ count=102400\ conv=fdatasync +$\ dd\ if=/path/to/mergerfs/mount/benchmark\ of=/dev/null\ bs=512\ count=102400\ iflag=dsync,nocache\ oflag=dsync,nocache\ conv=fdatasync\ status=progress 102400+0\ records\ in 102400+0\ records\ out 52428800\ bytes\ (52\ MB,\ 50\ MiB)\ copied,\ 0.757991\ s,\ 69.2\ MB/s -$\ dd\ if=/path/to/mergerfs/mount/benchmark\ of=/dev/null\ bs=1M\ count=1024\ conv=fdatasync +$\ dd\ if=/path/to/mergerfs/mount/benchmark\ of=/dev/null\ bs=1M\ count=1024\ iflag=dsync,nocache\ oflag=dsync,nocache\ conv=fdatasync\ status=progress 1024+0\ records\ in 1024+0\ records\ out 1073741824\ bytes\ (1.1\ GB,\ 1.0\ GiB)\ copied,\ 0.18405\ s,\ 5.8\ GB/s @@ -883,6 +935,10 @@ wherever you configure the mounting of mergerfs (/etc/fstab). Use \f[C]xattr\ \-l\ /mountpoint/.mergerfs\f[] to see all supported keys. Some are informational and therefore read\-only. +\f[C]setxattr\f[] will return EINVAL on read\-only keys. +.SS Values +.PP +Same as the command line. .SS user.mergerfs.branches .PP \f[B]NOTE:\f[] formerly \f[C]user.mergerfs.srcmounts\f[] but said key is @@ -937,30 +993,12 @@ T} .PP The \f[C]=NC\f[], \f[C]=RO\f[], \f[C]=RW\f[] syntax works just as on the command line. -.SS minfreespace -.PP -Input: interger with an optional multiplier suffix. -\f[B]K\f[], \f[B]M\f[], or \f[B]G\f[]. -.PP -Output: value in bytes -.SS moveonenospc -.PP -Input: \f[B]true\f[] and \f[B]false\f[] -.PP -Ouput: \f[B]true\f[] or \f[B]false\f[] -.SS categories / funcs -.PP -Input: short policy string as described elsewhere in this document -.PP -Output: the policy string except for categories where its funcs have -multiple types. -In that case it will be a comma separated list .SS Example .IP .nf \f[C] [trapexit:/mnt/mergerfs]\ $\ xattr\ \-l\ .mergerfs -user.mergerfs.branches:\ /mnt/a:/mnt/b +user.mergerfs.branches:\ /mnt/a=RW:/mnt/b=RW user.mergerfs.minfreespace:\ 4294967295 user.mergerfs.moveonenospc:\ false \&... @@ -994,16 +1032,16 @@ served. To access the values you will need to issue a getxattr (http://linux.die.net/man/2/getxattr) for one of the following: .IP \[bu] 2 -\f[B]user.mergerfs.basepath:\f[] the base mount point for the file given +\f[B]user.mergerfs.basepath\f[]: the base mount point for the file given the current getattr policy .IP \[bu] 2 -\f[B]user.mergerfs.relpath:\f[] the relative path of the file from the +\f[B]user.mergerfs.relpath\f[]: the relative path of the file from the perspective of the mount point .IP \[bu] 2 -\f[B]user.mergerfs.fullpath:\f[] the full path of the original file +\f[B]user.mergerfs.fullpath\f[]: the full path of the original file given the getattr policy .IP \[bu] 2 -\f[B]user.mergerfs.allpaths:\f[] a NUL (\[aq]\[aq]) separated list of +\f[B]user.mergerfs.allpaths\f[]: a NUL (\[aq]\[aq]) separated list of full paths to all files found .IP .nf @@ -1192,13 +1230,10 @@ writeback caching is a technique for improving write speeds by batching writes at a faster device and then bulk writing to the slower device. With FUSE the kernel will wait for a number of writes to be made and then send it to the filesystem as one request. -mergerfs currently uses a slightly modified and vendored libfuse 2.9.7 -which does not support writeback caching. -However, a prototype port to libfuse 3.x has been made and the writeback -cache appears to work as expected (though performance improvements -greatly depend on the way the client app writes data). -Once the port is complete and thoroughly tested writeback caching will -be available. +mergerfs currently uses a modified and vendored libfuse 2.9.7 which does +not support writeback caching. +Adding said feature should not be difficult but benchmarking needs to be +done to see if what effect it will have. .SS tiered caching .PP Some storage technologies support what some call "tiered" caching. diff --git a/src/config.cpp b/src/config.cpp index d04d0da6d..22f7c8456 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -53,6 +53,7 @@ Config::Config() cache_readdir(false), async_read(true), cache_files(CacheFiles::LIBFUSE), + fuse_msg_size(FUSE_MAX_MAX_PAGES), POLICYINIT(access), POLICYINIT(chmod), POLICYINIT(chown), diff --git a/src/config.hpp b/src/config.hpp index 9d9454495..a95061733 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -113,6 +113,7 @@ class Config bool cache_readdir; bool async_read; CacheFiles cache_files; + uint16_t fuse_msg_size; public: const Policy *policies[FuseFunc::Enum::END]; diff --git a/src/fuse_getxattr.cpp b/src/fuse_getxattr.cpp index 15de597cc..9bfff730a 100644 --- a/src/fuse_getxattr.cpp +++ b/src/fuse_getxattr.cpp @@ -233,6 +233,18 @@ namespace l attrvalue_ = (string)cache_files_; } + static + void + getxattr_controlfile(const uint16_t &uint16_, + string &attrvalue_) + { + std::ostringstream os; + + os << uint16_; + + attrvalue_ = os.str(); + } + static void getxattr_controlfile_policies(const Config &config, @@ -358,6 +370,8 @@ namespace l l::getxattr_controlfile_bool(config.posix_acl,attrvalue); else if(attr[2] == "async_read") l::getxattr_controlfile_bool(config.async_read,attrvalue); + else if(attr[2] == "fuse_msg_size") + l::getxattr_controlfile(config.fuse_msg_size,attrvalue); break; case 4: diff --git a/src/fuse_init.cpp b/src/fuse_init.cpp index 72881cf45..5bfb91c33 100644 --- a/src/fuse_init.cpp +++ b/src/fuse_init.cpp @@ -60,6 +60,22 @@ namespace l *want_ = false; } + + static + void + want_if_capable_max_pages(fuse_conn_info *conn_, + Config &c_) + { + if(l::capable(conn_,FUSE_CAP_MAX_PAGES)) + { + l::want(conn_,FUSE_CAP_MAX_PAGES); + conn_->max_pages = c_.fuse_msg_size; + } + else + { + c_.fuse_msg_size = FUSE_DEFAULT_MAX_PAGES_PER_REQ; + } + } } namespace FUSE @@ -80,6 +96,7 @@ namespace FUSE l::want_if_capable(conn_,FUSE_CAP_IOCTL_DIR); l::want_if_capable(conn_,FUSE_CAP_PARALLEL_DIROPS); l::want_if_capable(conn_,FUSE_CAP_POSIX_ACL,&c.posix_acl); + l::want_if_capable_max_pages(conn_,c); return &c; } diff --git a/src/fuse_listxattr.cpp b/src/fuse_listxattr.cpp index 837e7f343..32a0fcbb1 100644 --- a/src/fuse_listxattr.cpp +++ b/src/fuse_listxattr.cpp @@ -56,6 +56,7 @@ namespace l ("user.mergerfs.cache.symlinks") ("user.mergerfs.direct_io") ("user.mergerfs.dropcacheonclose") + ("user.mergerfs.fuse_msg_size") ("user.mergerfs.ignorepponrename") ("user.mergerfs.link_cow") ("user.mergerfs.minfreespace") diff --git a/src/option_parser.cpp b/src/option_parser.cpp index a872cef93..c5692ac66 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -102,6 +102,28 @@ set_default_options(fuse_args *args) set_option(args,"default_permissions"); } +static +int +parse_and_process(const std::string &value_, + uint16_t &uint16_, + uint16_t min_, + uint16_t max_) +{ + int rv; + uint64_t uint64; + + rv = num::to_uint64_t(value_,uint64); + if(rv == -1) + return 1; + + if((uint64 > max_) || (uint64 < min_)) + return 1; + + uint16_ = uint64; + + return 0; +} + static int parse_and_process(const std::string &value_, @@ -345,6 +367,10 @@ parse_and_process_kv_arg(Config &config, rv = parse_and_process(value,config.auto_cache); else if(key == "async_read") rv = parse_and_process(value,config.async_read); + else if(key == "fuse_msg_size") + rv = parse_and_process(value,config.fuse_msg_size, + 1, + FUSE_MAX_MAX_PAGES); } if(rv == -1)