Skip to content

Commit

Permalink
Merge pull request #276 from wahtari/copy_file_range
Browse files Browse the repository at this point in the history
Added copy_file_range for faster file copy in mkcomposefs.
  • Loading branch information
alexlarsson committed Apr 25, 2024
2 parents e78c7a4 + 3e38d73 commit ecef20c
Showing 1 changed file with 118 additions and 14 deletions.
132 changes: 118 additions & 14 deletions tools/mkcomposefs.c
Original file line number Diff line number Diff line change
Expand Up @@ -782,9 +782,27 @@ static int write_to_fd(int fd, const char *content, ssize_t len)

return 0;
}
static pthread_mutex_t mutex_thread_access = PTHREAD_MUTEX_INITIALIZER;
static bool try_copy_file_range = true;
static bool is_copy_file_range_available(void)
{
bool ret = true;
pthread_mutex_lock(&mutex_thread_access);
ret = try_copy_file_range;
pthread_mutex_unlock(&mutex_thread_access);

return ret;
}

static void disable_copy_file_range(void)
{
pthread_mutex_lock(&mutex_thread_access);
try_copy_file_range = false;
pthread_mutex_unlock(&mutex_thread_access);
}

#define BUFSIZE 8192
static int copy_file_data(int sfd, int dfd)
static int copy_file_data_classic(int sfd, int dfd)
{
char buffer[BUFSIZE];
ssize_t bytes_read;
Expand All @@ -807,6 +825,97 @@ static int copy_file_data(int sfd, int dfd)
return 0;
}

static int copy_file_data_range(int sfd, int dfd)
{
struct stat stat;

if (fstat(sfd, &stat) == -1)
return -1;

off_t len, ret;
len = stat.st_size;

if (len == 0)
return 0;

do {
ret = copy_file_range(sfd, NULL, dfd, NULL, len, 0);
if (ret < 0 && errno == EINTR)
continue;
if (ret == -1)
return -1;
// This is an implementation problem in copy_file_range. Handle it and return error so that classic copy can be retried
if (ret == 0 && len > 0) {
// Setting this error code to trigger a classic copy
// https://github.com/rust-lang/rust/blob/0e5f5207881066973486e6a480fa46cfa22947e9/library/std/src/sys/pal/unix/kernel_copy.rs#L622
// fallback to work around several kernel bugs where copy_file_range will fail to
// copy any bytes and return 0 instead of an error if
// - reading virtual files from the proc filesystem which appear to have 0 size
// but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
// - copying from an overlay filesystem in docker. reported to occur on fedora 32.
errno = EINVAL; // EINVAL Either fd_in or fd_out is not a regular file.
return -1;
}
if (ret == 0)
break;

len -= ret;
} while (len > 0 && ret > 0);

return 0;
}

static int copy_file_data(int sfd, int dfd)
{
bool use_copy_classic = !is_copy_file_range_available();
// https://github.com/rust-lang/rust/blob/0e5f5207881066973486e6a480fa46cfa22947e9/library/std/src/sys/pal/unix/kernel_copy.rs#L622
// https://gitlab.gnome.org/GNOME/libglnx/-/blob/202b294e6079e23242e65e0426f8639841d1210b/glnx-fdio.c#L846
// https://github.com/systemd/systemd/blob/e71b40fd0026c0884ca26eb4f0a9fbe4d9285cfa/src/shared/copy.c#L338
// https://lwn.net/Articles/846403/
int ret = -1;
if (!use_copy_classic) {
ret = copy_file_data_range(sfd, dfd);
// Write was successful
if (0 == ret)
return 0;

// https://github.com/rust-lang/rust/blob/0e5f5207881066973486e6a480fa46cfa22947e9/library/std/src/sys/pal/unix/kernel_copy.rs#L622
// Try fallback io::copy if either:
// - Kernel version is < 4.5 (ENOSYS¹)
// - Files are mounted on different fs (EXDEV)
// - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
// - copy_file_range file is immutable or syscall is blocked by seccomp¹ (EPERM)
// - copy_file_range cannot be used with pipes or device nodes (EINVAL)
// - the writer fd was opened with O_APPEND (EBADF²)
// and no bytes were written successfully yet. (All these errnos should
// not be returned if something was already written, but they happen in
// the wild, see #91152.)
//
// ¹ these cases should be detected by the initial probe but we handle them here
// anyway in case syscall interception changes during runtime
// ² actually invalid file descriptors would cause this too, but in that case
// the fallback code path is expected to encounter the same error again

// Disable copy file range for the entire run because,
// the rest of the files as part of this run will also have the similar file system.
if (ret < 0 && (errno == ENOSYS || errno == EXDEV)) {
disable_copy_file_range();
use_copy_classic = true;
}

// Try classic for this file but copy_file_range could work for the next file.
if (ret < 0 && (errno == EOPNOTSUPP || errno == EPERM ||
errno == EINVAL || errno == EBADF)) {
use_copy_classic = true;
}
}

if (use_copy_classic) {
ret = copy_file_data_classic(sfd, dfd);
}
return ret;
}

static int copy_file_with_dirs_if_needed(const char *src, const char *dst_base,
const char *dst, bool try_enable_fsverity)
{
Expand Down Expand Up @@ -1020,7 +1129,7 @@ static int construct_compute_data(struct lcfs_node_s *node,
}

struct work_item_iterator {
pthread_mutex_t mutex_node_iterator;
pthread_mutex_t *mutex_node_iterator;
int current_item;
int errorcode;
bool cancel_request;
Expand All @@ -1035,26 +1144,26 @@ static struct work_item *get_next_work_item(struct work_collection *collection,
bool cancel = false;
struct work_item *ret = NULL;

pthread_mutex_lock(&(iterator->mutex_node_iterator));
pthread_mutex_lock(iterator->mutex_node_iterator);
if (iterator->cancel_request)
cancel = true;
else if (iterator->current_item < collection->count) {
ret = &(collection->items[iterator->current_item]);
iterator->current_item++;
}
pthread_mutex_unlock(&(iterator->mutex_node_iterator));
pthread_mutex_unlock(iterator->mutex_node_iterator);
return cancel ? NULL : ret;
}

static void request_cancel(struct work_item_iterator *iterator, int errorcode)
{
pthread_mutex_lock(&(iterator->mutex_node_iterator));
pthread_mutex_lock(iterator->mutex_node_iterator);
// Record only the first cancels error code
if (!iterator->cancel_request) {
iterator->cancel_request = true;
iterator->errorcode = errorcode;
}
pthread_mutex_unlock(&(iterator->mutex_node_iterator));
pthread_mutex_unlock(iterator->mutex_node_iterator);
}

typedef int (*THREAD_PROCESS_PROC)(struct work_item *, void *);
Expand Down Expand Up @@ -1109,12 +1218,7 @@ static int execute_in_threads(const int requested_threads,
THREAD_PROCESS_PROC proc, void *data)
{
struct work_item_iterator iterator;
int ret = pthread_mutex_init(&iterator.mutex_node_iterator, NULL);
if (0 != ret) {
errno = ret;
return -1;
}

iterator.mutex_node_iterator = &mutex_thread_access;
iterator.current_item = 0;
iterator.errorcode = 0;
iterator.cancel_request = false;
Expand All @@ -1125,6 +1229,7 @@ static int execute_in_threads(const int requested_threads,
thread_info.collection = collection;
thread_info.iterator = &iterator;

int ret = -1;
cleanup_free pthread_t *threads = NULL;
const int thread_count = requested_threads - 1;
if (thread_count >= 1) {
Expand Down Expand Up @@ -1201,7 +1306,6 @@ static int fill_store(const int thread_count, struct lcfs_node_s *node,
int ret = execute_in_threads(thread_count, &collection, process_copy,
(void *)digest_store_path);
cleanup_work_items(&collection);

return ret;
}

Expand Down Expand Up @@ -1239,7 +1343,7 @@ static void usage(const char *argv0)
" --from-file The source is a dump file, not a directory\n"
" --min-version=N Use this minimal format version (default=%d)\n"
" --max-version=N Use this maxium format version (default=%d)\n"
" --threads=N Use this to calculate digest and copy files in threads (default=%d)\n",
" --threads=N Use this to override the default number of threads used to calculate digest and copy files (default=%d)\n",
bin, LCFS_DEFAULT_VERSION_MIN, LCFS_DEFAULT_VERSION_MAX,
get_cpu_count());
}
Expand Down

0 comments on commit ecef20c

Please sign in to comment.