From 5749cb9c8e1857d95c5f65d3a009418d3c130778 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 21 Dec 2015 12:06:29 +0100 Subject: [PATCH] Add support for btrfs deduplication Thie adds an option -B (or --dedupe) to fdupes which issues the BTRFS_IOC_FILE_EXTENT_SAME ioctl on files that it detects as duplicates, causing btrfs to deduplicate the data on disk. This build option needs to be turned on separately in the Makefile by setting the HAVE_BTRFS_IOCTL_H preprocessor macro to not break builds missing this header. Also fixes a memory leak in escapefilename(). --- Makefile | 7 +- fdupes.1 | 5 ++ fdupes.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 186 insertions(+), 34 deletions(-) diff --git a/Makefile b/Makefile index ed5498e..cbe2f38 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,11 @@ FILEOFFSET_64BIT = -D_FILE_OFFSET_BITS=64 # #OMIT_GETOPT_LONG = -DOMIT_GETOPT_LONG +# +# Whether we have btrfs/ioctl.h. Needed for --dedupe. +# +# HAVE_BTRFS_IOCTL_H = -DHAVE_BTRFS_IOCTL_H + ##################################################################### # Developer Configuration Section # ##################################################################### @@ -73,7 +78,7 @@ MKDIR = mkdir -p CC ?= gcc COMPILER_OPTIONS = -Wall -O -g -CFLAGS= $(COMPILER_OPTIONS) -I. -DVERSION=\"$(VERSION)\" $(OMIT_GETOPT_LONG) $(FILEOFFSET_64BIT) +CFLAGS= $(COMPILER_OPTIONS) -I. -DVERSION=\"$(VERSION)\" $(OMIT_GETOPT_LONG) $(FILEOFFSET_64BIT) $(HAVE_BTRFS_IOCTL_H) INSTALL_PROGRAM = $(INSTALL) -c -m 0755 INSTALL_DATA = $(INSTALL) -c -m 0644 diff --git a/fdupes.1 b/fdupes.1 index bdad958..b4129d5 100644 --- a/fdupes.1 +++ b/fdupes.1 @@ -59,6 +59,11 @@ prompt user for files to preserve, deleting all others (see .B CAVEATS below) .TP +.B -B --dedupe +issue the btrfs same-extents ioctl to trigger a deduplication on +disk. fdupes needs to be built with btrfs support for this option +to be available +.TP .B -N --noprompt when used together with \-\-delete, preserve the first file in each set of duplicates and delete the others without prompting the user diff --git a/fdupes.c b/fdupes.c index 61f0c55..1f0278a 100644 --- a/fdupes.c +++ b/fdupes.c @@ -33,6 +33,13 @@ #include #include +#ifdef HAVE_BTRFS_IOCTL_H +#include +#include +#include +#include +#endif + #include "md5/md5.h" #define ISFLAG(a,b) ((a & b) == b) @@ -53,6 +60,7 @@ #define F_EXCLUDEHIDDEN 0x1000 #define F_PERMISSIONS 0x2000 #define F_REVERSE 0x4000 +#define F_DEDUPEFILES 0x8000 typedef enum { ORDER_TIME = 0, @@ -146,6 +154,7 @@ void escapefilename(char *escape_list, char **filename_ptr) } strcpy(*filename_ptr, tmp); } + free(tmp); } off_t filesize(char *filename) { @@ -713,13 +722,147 @@ int relink(char *oldfile, char *newfile) return 1; } +void get_max_dupes(file_t *files, int *max, int *n_files) { + file_t *curdupe; + int n_dupes; + + *max = 0; + if (n_files) + *n_files = 0; + + while (files) { + if (files->hasdupes) { + if (files->size && n_files) + (*n_files)++; + + n_dupes = 1; + + for (curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) + n_dupes++; + + if (n_dupes > *max) + *max = n_dupes; + + } + + files = files->next; + } +} + +#ifdef HAVE_BTRFS_IOCTL_H +static char *dedupeerrstr(int err) { + static char buf[1024]; + + if (err == BTRFS_SAME_DATA_DIFFERS) { + return "BTRFS_SAME_DATA_DIFFERS (data modified in the meantime?)"; + } else if (err < 0) { + return strerror(-err); + } else { + snprintf(buf, sizeof(buf), "Unknown error %d", err); + buf[sizeof(buf)-1] = '\0'; + return buf; + } +} + +void dedupefiles(file_t *files) +{ + struct btrfs_ioctl_same_args *same; + char **dupe_filenames; /* maps to same->info indices */ + + file_t *curfile; + int n_dupes, max_dupes, cur_info; + int cur_file = 0, max_files; + + int fd; + int ret, status; + + get_max_dupes(files, &max_dupes, &max_files); + same = calloc(sizeof(struct btrfs_ioctl_same_args) + + sizeof(struct btrfs_ioctl_same_extent_info) * max_dupes, 1); + dupe_filenames = malloc(max_dupes * sizeof(char *)); + if (!same || !dupe_filenames) { + errormsg("out of memory\n"); + exit(1); + } + + while (files) { + if (files->hasdupes && files->size) { + cur_file++; + if (!ISFLAG(flags, F_HIDEPROGRESS)) { + fprintf(stderr, "\rDedupe [%d/%d] %d%% ", cur_file, max_files, + cur_file*100 / max_files); + } + + cur_info = 0; + for (curfile = files->duplicates; curfile; curfile = curfile->duplicates) { + dupe_filenames[cur_info] = curfile->d_name; + fd = open(curfile->d_name, O_RDONLY); + if (fd == -1) { + errormsg("Unable to open(\"%s\", O_RDONLY): %s\n", + curfile->d_name, strerror(errno)); + continue; + } + + same->info[cur_info].fd = fd; + same->info[cur_info].logical_offset = 0; + cur_info++; + } + n_dupes = cur_info; + + same->logical_offset = 0; + same->length = files->size; + same->dest_count = n_dupes; + + fd = open(files->d_name, O_RDONLY); + if (fd == -1) { + errormsg("Unable to open(\"%s\", O_RDONLY): %s\n", files->d_name, + strerror(errno)); + goto cleanup; + } + + ret = ioctl(fd, BTRFS_IOC_FILE_EXTENT_SAME, same); + if (close(fd) == -1) + errormsg("Unable to close(\"%s\"): %s\n", files->d_name, strerror(errno)); + + if (ret == -1) { + errormsg("ioctl(\"%s\", BTRFS_IOC_FILE_EXTENT_SAME, [%d files]): %s\n", + files->d_name, n_dupes, strerror(errno)); + goto cleanup; + } + + for (cur_info = 0; cur_info < n_dupes; cur_info++) { + if ((status = same->info[cur_info].status) != 0) { + errormsg("Couldn't dedupe %s => %s: %s\n", files->d_name, + dupe_filenames[cur_info], dedupeerrstr(status)); + } + } + +cleanup: + for (cur_info = 0; cur_info < n_dupes; cur_info++) { + if (close(same->info[cur_info].fd) == -1) { + errormsg("Unable to close(\"%s\"): %s", dupe_filenames[cur_info], + strerror(errno)); + } + } + + } /* has dupes */ + + files = files->next; + } + + if (!ISFLAG(flags, F_HIDEPROGRESS)) + fprintf(stderr, "\r%40s\r", " "); + free(same); + free(dupe_filenames); +} +#endif + void deletefiles(file_t *files, int prompt, FILE *tty) { int counter; int groups = 0; int curgroup = 0; file_t *tmpfile; - file_t *curfile; file_t **dupelist; int *preserve; char *preservestr; @@ -727,28 +870,11 @@ void deletefiles(file_t *files, int prompt, FILE *tty) char *tstr; int number; int sum; - int max = 0; + int max; int x; int i; - curfile = files; - - while (curfile) { - if (curfile->hasdupes) { - counter = 1; - groups++; - - tmpfile = curfile->duplicates; - while (tmpfile) { - counter++; - tmpfile = tmpfile->duplicates; - } - - if (counter > max) max = counter; - } - - curfile = curfile->next; - } + get_max_dupes(files, &max, NULL); max++; @@ -955,6 +1081,12 @@ void help_text() printf(" \twith -s or --symlinks, or when specifying a\n"); printf(" \tparticular directory more than once; refer to the\n"); printf(" \tfdupes documentation for additional information\n"); +#ifdef HAVE_BTRFS_IOCTL_H + printf(" -B --dedupe \tCall the btrfs same-extent ioctl to deduplicate\n"); + printf(" \tdata on disk\n"); +#else + printf(" -B --dedupe \tNot available in this build of fdupes\n"); +#endif /*printf(" -l --relink \t(description)\n");*/ printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n"); printf(" \teach set of duplicates and delete the rest without\n"); @@ -1011,6 +1143,7 @@ int main(int argc, char **argv) { { "permissions", 0, 0, 'p' }, { "order", 1, 0, 'o' }, { "reverse", 0, 0, 'i' }, + { "dedupe", 0, 0, 'B' }, { 0, 0, 0, 0 } }; #define GETOPT getopt_long @@ -1022,7 +1155,7 @@ int main(int argc, char **argv) { oldargv = cloneargs(argc, argv); - while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:i" + while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:iB" #ifndef OMIT_GETOPT_LONG , long_options, NULL #endif @@ -1089,6 +1222,14 @@ int main(int argc, char **argv) { case 'i': SETFLAG(flags, F_REVERSE); break; + case 'B': +#ifdef HAVE_BTRFS_IOCTL_H + SETFLAG(flags, F_DEDUPEFILES); +#else + errormsg("This version of fdupes was built without btrfs support\n"); + exit(1); +#endif + break; default: fprintf(stderr, "Try `fdupes --help' for more information.\n"); @@ -1106,10 +1247,12 @@ int main(int argc, char **argv) { exit(1); } - if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) { - errormsg("options --summarize and --delete are not compatible\n"); - exit(1); - } + if (!!ISFLAG(flags, F_SUMMARIZEMATCHES) + + !!ISFLAG(flags, F_DELETEFILES) + + !!ISFLAG(flags, F_DEDUPEFILES) > 1) { + errormsg("Only one of --summarize, --delete or --dedupe may be passed\n"); + exit(1); + } if (ISFLAG(flags, F_RECURSEAFTER)) { firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind); @@ -1204,15 +1347,14 @@ int main(int argc, char **argv) { deletefiles(files, 1, stdin); } } - +#ifdef HAVE_BTRFS_IOCTL_H + else if (ISFLAG(flags, F_DEDUPEFILES)) + dedupefiles(files); +#endif + else if (ISFLAG(flags, F_SUMMARIZEMATCHES)) + summarizematches(files); else - - if (ISFLAG(flags, F_SUMMARIZEMATCHES)) - summarizematches(files); - - else - - printmatches(files); + printmatches(files); while (files) { curfile = files->next;