diff --git a/Makefile b/Makefile index ed5498e..cbe2f38 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,11 @@ FILEOFFSET_64BIT = -D_FILE_OFFSET_BITS=64 # #OMIT_GETOPT_LONG = -DOMIT_GETOPT_LONG +# +# Whether we have btrfs/ioctl.h. Needed for --dedupe. +# +# HAVE_BTRFS_IOCTL_H = -DHAVE_BTRFS_IOCTL_H + ##################################################################### # Developer Configuration Section # ##################################################################### @@ -73,7 +78,7 @@ MKDIR = mkdir -p CC ?= gcc COMPILER_OPTIONS = -Wall -O -g -CFLAGS= $(COMPILER_OPTIONS) -I. -DVERSION=\"$(VERSION)\" $(OMIT_GETOPT_LONG) $(FILEOFFSET_64BIT) +CFLAGS= $(COMPILER_OPTIONS) -I. -DVERSION=\"$(VERSION)\" $(OMIT_GETOPT_LONG) $(FILEOFFSET_64BIT) $(HAVE_BTRFS_IOCTL_H) INSTALL_PROGRAM = $(INSTALL) -c -m 0755 INSTALL_DATA = $(INSTALL) -c -m 0644 diff --git a/fdupes.1 b/fdupes.1 index bdad958..76c64c1 100644 --- a/fdupes.1 +++ b/fdupes.1 @@ -59,6 +59,11 @@ prompt user for files to preserve, deleting all others (see .B CAVEATS below) .TP +.B -D --dedupe +issue the btrfs same-extents ioctl to trigger a deduplication on +disk. fdupes needs to be built with btrfs support for this option +to be available +.TP .B -N --noprompt when used together with \-\-delete, preserve the first file in each set of duplicates and delete the others without prompting the user diff --git a/fdupes.c b/fdupes.c index 61f0c55..8838d72 100644 --- a/fdupes.c +++ b/fdupes.c @@ -33,6 +33,13 @@ #include #include +#ifdef HAVE_BTRFS_IOCTL_H +#include +#include +#include +#include +#endif + #include "md5/md5.h" #define ISFLAG(a,b) ((a & b) == b) @@ -53,6 +60,7 @@ #define F_EXCLUDEHIDDEN 0x1000 #define F_PERMISSIONS 0x2000 #define F_REVERSE 0x4000 +#define F_DEDUPEFILES 0x8000 typedef enum { ORDER_TIME = 0, @@ -146,6 +154,7 @@ void escapefilename(char *escape_list, char **filename_ptr) } strcpy(*filename_ptr, tmp); } + free(tmp); } off_t filesize(char *filename) { @@ -713,13 +722,147 @@ int relink(char *oldfile, char *newfile) return 1; } +void get_max_dupes(file_t *files, int *max, int *n_files) { + file_t *curdupe; + int n_dupes; + + *max = 0; + if (n_files) + *n_files = 0; + + while (files) { + if (files->hasdupes) { + if (files->size && n_files) + (*n_files)++; + + n_dupes = 1; + + for (curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) + n_dupes++; + + if (n_dupes > *max) + *max = n_dupes; + + } + + files = files->next; + } +} + +#ifdef HAVE_BTRFS_IOCTL_H +static char *dedupeerrstr(int err) { + static char buf[1024]; + + if (err == BTRFS_SAME_DATA_DIFFERS) { + return "BTRFS_SAME_DATA_DIFFERS (data modified in the meantime?)"; + } else if (err < 0) { + return strerror(-err); + } else { + snprintf(buf, sizeof(buf), "Unknown error %d", err); + buf[sizeof(buf)-1] = '\0'; + return buf; + } +} + +void dedupefiles(file_t *files) +{ + struct btrfs_ioctl_same_args *same; + char **dupe_filenames; /* maps to same->info indices */ + + file_t *curfile; + int n_dupes, max_dupes, cur_info; + int cur_file = 0, max_files; + + int fd; + int ret, status; + + get_max_dupes(files, &max_dupes, &max_files); + same = calloc(sizeof(struct btrfs_ioctl_same_args) + + sizeof(struct btrfs_ioctl_same_extent_info) * max_dupes, 1); + dupe_filenames = malloc(max_dupes * sizeof(char *)); + if (!same || !dupe_filenames) { + errormsg("out of memory\n"); + exit(1); + } + + while (files) { + if (files->hasdupes && files->size) { + cur_file++; + if (!ISFLAG(flags, F_HIDEPROGRESS)) { + fprintf(stderr, "\rDedupe [%d/%d] %d%% ", cur_file, max_files, + cur_file*100 / max_files); + } + + cur_info = 0; + for (curfile = files->duplicates; curfile; curfile = curfile->duplicates) { + dupe_filenames[cur_info] = curfile->d_name; + fd = open(curfile->d_name, O_RDONLY); + if (fd == -1) { + errormsg("Unable to open(\"%s\", O_RDONLY): %s\n", + curfile->d_name, strerror(errno)); + continue; + } + + same->info[cur_info].fd = fd; + same->info[cur_info].logical_offset = 0; + cur_info++; + } + n_dupes = cur_info; + + same->logical_offset = 0; + same->length = files->size; + same->dest_count = n_dupes; + + fd = open(files->d_name, O_RDONLY); + if (fd == -1) { + errormsg("Unable to open(\"%s\", O_RDONLY): %s\n", files->d_name, + strerror(errno)); + goto cleanup; + } + + ret = ioctl(fd, BTRFS_IOC_FILE_EXTENT_SAME, same); + if (close(fd) == -1) + errormsg("Unable to close(\"%s\"): %s\n", files->d_name, strerror(errno)); + + if (ret == -1) { + errormsg("ioctl(\"%s\", BTRFS_IOC_FILE_EXTENT_SAME, [%d files]): %s\n", + files->d_name, n_dupes, strerror(errno)); + goto cleanup; + } + + for (cur_info = 0; cur_info < n_dupes; cur_info++) { + if ((status = same->info[cur_info].status) != 0) { + errormsg("Couldn't dedupe %s => %s: %s\n", files->d_name, + dupe_filenames[cur_info], dedupeerrstr(status)); + } + } + +cleanup: + for (cur_info = 0; cur_info < n_dupes; cur_info++) { + if (close(same->info[cur_info].fd) == -1) { + errormsg("Unable to close(\"%s\"): %s", dupe_filenames[cur_info], + strerror(errno)); + } + } + + } /* has dupes */ + + files = files->next; + } + + if (!ISFLAG(flags, F_HIDEPROGRESS)) + fprintf(stderr, "\r%40s\r", " "); + free(same); + free(dupe_filenames); +} +#endif + void deletefiles(file_t *files, int prompt, FILE *tty) { int counter; int groups = 0; int curgroup = 0; file_t *tmpfile; - file_t *curfile; file_t **dupelist; int *preserve; char *preservestr; @@ -727,28 +870,11 @@ void deletefiles(file_t *files, int prompt, FILE *tty) char *tstr; int number; int sum; - int max = 0; + int max; int x; int i; - curfile = files; - - while (curfile) { - if (curfile->hasdupes) { - counter = 1; - groups++; - - tmpfile = curfile->duplicates; - while (tmpfile) { - counter++; - tmpfile = tmpfile->duplicates; - } - - if (counter > max) max = counter; - } - - curfile = curfile->next; - } + get_max_dupes(files, &max, NULL); max++; @@ -955,6 +1081,12 @@ void help_text() printf(" \twith -s or --symlinks, or when specifying a\n"); printf(" \tparticular directory more than once; refer to the\n"); printf(" \tfdupes documentation for additional information\n"); +#ifdef HAVE_BTRFS_IOCTL_H + printf(" -D --dedupe \tCall the btrfs same-extent ioctl to deduplicate\n"); + printf(" \tdata on disk\n"); +#else + printf(" -D --dedupe \tNot available in this build of fdupes\n"); +#endif /*printf(" -l --relink \t(description)\n");*/ printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n"); printf(" \teach set of duplicates and delete the rest without\n"); @@ -1011,6 +1143,7 @@ int main(int argc, char **argv) { { "permissions", 0, 0, 'p' }, { "order", 1, 0, 'o' }, { "reverse", 0, 0, 'i' }, + { "dedupe", 0, 0, 'D' }, { 0, 0, 0, 0 } }; #define GETOPT getopt_long @@ -1022,7 +1155,7 @@ int main(int argc, char **argv) { oldargv = cloneargs(argc, argv); - while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:i" + while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:iD" #ifndef OMIT_GETOPT_LONG , long_options, NULL #endif @@ -1089,6 +1222,14 @@ int main(int argc, char **argv) { case 'i': SETFLAG(flags, F_REVERSE); break; + case 'D': +#ifdef HAVE_BTRFS_IOCTL_H + SETFLAG(flags, F_DEDUPEFILES); +#else + errormsg("This version of fdupes was built without btrfs support\n"); + exit(1); +#endif + break; default: fprintf(stderr, "Try `fdupes --help' for more information.\n"); @@ -1106,10 +1247,12 @@ int main(int argc, char **argv) { exit(1); } - if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) { - errormsg("options --summarize and --delete are not compatible\n"); - exit(1); - } + if (!!ISFLAG(flags, F_SUMMARIZEMATCHES) + + !!ISFLAG(flags, F_DELETEFILES) + + !!ISFLAG(flags, F_DEDUPEFILES) > 1) { + errormsg("Only one of --summarize, --delete or --dedupe may be passed\n"); + exit(1); + } if (ISFLAG(flags, F_RECURSEAFTER)) { firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind); @@ -1204,15 +1347,14 @@ int main(int argc, char **argv) { deletefiles(files, 1, stdin); } } - +#ifdef HAVE_BTRFS_IOCTL_H + else if (ISFLAG(flags, F_DEDUPEFILES)) + dedupefiles(files); +#endif + else if (ISFLAG(flags, F_SUMMARIZEMATCHES)) + summarizematches(files); else - - if (ISFLAG(flags, F_SUMMARIZEMATCHES)) - summarizematches(files); - - else - - printmatches(files); + printmatches(files); while (files) { curfile = files->next;