Skip to content

Commit

Permalink
Add support for btrfs deduplication
Browse files Browse the repository at this point in the history
Thie adds an option -D (or --dedupe) to fdupes which issues the
BTRFS_IOC_FILE_EXTENT_SAME ioctl on files that it detects as duplicates,
causing btrfs to deduplicate the data on disk.

This build option needs to be turned on separately in the Makefile by
setting the HAVE_BTRFS_IOCTL_H preprocessor macro to not break builds
missing this header.

Also fixes a memory leak in escapefilename().
  • Loading branch information
yath committed Dec 21, 2015
1 parent 03abad9 commit 9b30ea1
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 34 deletions.
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ FILEOFFSET_64BIT = -D_FILE_OFFSET_BITS=64
#
#OMIT_GETOPT_LONG = -DOMIT_GETOPT_LONG

#
# Whether we have btrfs/ioctl.h. Needed for --dedupe.
#
# HAVE_BTRFS_IOCTL_H = -DHAVE_BTRFS_IOCTL_H

#####################################################################
# Developer Configuration Section #
#####################################################################
Expand Down Expand Up @@ -73,7 +78,7 @@ MKDIR = mkdir -p
CC ?= gcc
COMPILER_OPTIONS = -Wall -O -g

CFLAGS= $(COMPILER_OPTIONS) -I. -DVERSION=\"$(VERSION)\" $(OMIT_GETOPT_LONG) $(FILEOFFSET_64BIT)
CFLAGS= $(COMPILER_OPTIONS) -I. -DVERSION=\"$(VERSION)\" $(OMIT_GETOPT_LONG) $(FILEOFFSET_64BIT) $(HAVE_BTRFS_IOCTL_H)

INSTALL_PROGRAM = $(INSTALL) -c -m 0755
INSTALL_DATA = $(INSTALL) -c -m 0644
Expand Down
5 changes: 5 additions & 0 deletions fdupes.1
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ prompt user for files to preserve, deleting all others (see
.B CAVEATS
below)
.TP
.B -D --dedupe
issue the btrfs same-extents ioctl to trigger a deduplication on
disk. fdupes needs to be built with btrfs support for this option
to be available
.TP
.B -N --noprompt
when used together with \-\-delete, preserve the first file in each
set of duplicates and delete the others without prompting the user
Expand Down
208 changes: 175 additions & 33 deletions fdupes.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@
#include <errno.h>
#include <libgen.h>

#ifdef HAVE_BTRFS_IOCTL_H
#include <sys/types.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <btrfs/ioctl.h>
#endif

#include "md5/md5.h"

#define ISFLAG(a,b) ((a & b) == b)
Expand All @@ -53,6 +60,7 @@
#define F_EXCLUDEHIDDEN 0x1000
#define F_PERMISSIONS 0x2000
#define F_REVERSE 0x4000
#define F_DEDUPEFILES 0x8000

typedef enum {
ORDER_TIME = 0,
Expand Down Expand Up @@ -146,6 +154,7 @@ void escapefilename(char *escape_list, char **filename_ptr)
}
strcpy(*filename_ptr, tmp);
}
free(tmp);
}

off_t filesize(char *filename) {
Expand Down Expand Up @@ -713,42 +722,159 @@ int relink(char *oldfile, char *newfile)
return 1;
}

void get_max_dupes(file_t *files, int *max, int *n_files) {
file_t *curdupe;
int n_dupes;

*max = 0;
if (n_files)
*n_files = 0;

while (files) {
if (files->hasdupes) {
if (files->size && n_files)
(*n_files)++;

n_dupes = 1;

for (curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates)
n_dupes++;

if (n_dupes > *max)
*max = n_dupes;

}

files = files->next;
}
}

#ifdef HAVE_BTRFS_IOCTL_H
static char *dedupeerrstr(int err) {
static char buf[1024];

if (err == BTRFS_SAME_DATA_DIFFERS) {
return "BTRFS_SAME_DATA_DIFFERS (data modified in the meantime?)";
} else if (err < 0) {
return strerror(-err);
} else {
snprintf(buf, sizeof(buf), "Unknown error %d", err);
buf[sizeof(buf)-1] = '\0';
return buf;
}
}

void dedupefiles(file_t *files)
{
struct btrfs_ioctl_same_args *same;
char **dupe_filenames; /* maps to same->info indices */

file_t *curfile;
int n_dupes, max_dupes, cur_info;
int cur_file = 0, max_files;

int fd;
int ret, status;

get_max_dupes(files, &max_dupes, &max_files);
same = calloc(sizeof(struct btrfs_ioctl_same_args) +
sizeof(struct btrfs_ioctl_same_extent_info) * max_dupes, 1);
dupe_filenames = malloc(max_dupes * sizeof(char *));
if (!same || !dupe_filenames) {
errormsg("out of memory\n");
exit(1);
}

while (files) {
if (files->hasdupes && files->size) {
cur_file++;
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
fprintf(stderr, "\rDedupe [%d/%d] %d%% ", cur_file, max_files,
cur_file*100 / max_files);
}

cur_info = 0;
for (curfile = files->duplicates; curfile; curfile = curfile->duplicates) {
dupe_filenames[cur_info] = curfile->d_name;
fd = open(curfile->d_name, O_RDONLY);
if (fd == -1) {
errormsg("Unable to open(\"%s\", O_RDONLY): %s\n",
curfile->d_name, strerror(errno));
continue;
}

same->info[cur_info].fd = fd;
same->info[cur_info].logical_offset = 0;
cur_info++;
}
n_dupes = cur_info;

same->logical_offset = 0;
same->length = files->size;
same->dest_count = n_dupes;

fd = open(files->d_name, O_RDONLY);
if (fd == -1) {
errormsg("Unable to open(\"%s\", O_RDONLY): %s\n", files->d_name,
strerror(errno));
goto cleanup;
}

ret = ioctl(fd, BTRFS_IOC_FILE_EXTENT_SAME, same);
if (close(fd) == -1)
errormsg("Unable to close(\"%s\"): %s\n", files->d_name, strerror(errno));

if (ret == -1) {
errormsg("ioctl(\"%s\", BTRFS_IOC_FILE_EXTENT_SAME, [%d files]): %s\n",
files->d_name, n_dupes, strerror(errno));
goto cleanup;
}

for (cur_info = 0; cur_info < n_dupes; cur_info++) {
if ((status = same->info[cur_info].status) != 0) {
errormsg("Couldn't dedupe %s => %s: %s\n", files->d_name,
dupe_filenames[cur_info], dedupeerrstr(status));
}
}

cleanup:
for (cur_info = 0; cur_info < n_dupes; cur_info++) {
if (close(same->info[cur_info].fd) == -1) {
errormsg("Unable to close(\"%s\"): %s", dupe_filenames[cur_info],
strerror(errno));
}
}

} /* has dupes */

files = files->next;
}

if (!ISFLAG(flags, F_HIDEPROGRESS))
fprintf(stderr, "\r%40s\r", " ");
free(same);
free(dupe_filenames);
}
#endif

void deletefiles(file_t *files, int prompt, FILE *tty)
{
int counter;
int groups = 0;
int curgroup = 0;
file_t *tmpfile;
file_t *curfile;
file_t **dupelist;
int *preserve;
char *preservestr;
char *token;
char *tstr;
int number;
int sum;
int max = 0;
int max;
int x;
int i;

curfile = files;

while (curfile) {
if (curfile->hasdupes) {
counter = 1;
groups++;

tmpfile = curfile->duplicates;
while (tmpfile) {
counter++;
tmpfile = tmpfile->duplicates;
}

if (counter > max) max = counter;
}

curfile = curfile->next;
}
get_max_dupes(files, &max, NULL);

max++;

Expand Down Expand Up @@ -955,6 +1081,12 @@ void help_text()
printf(" \twith -s or --symlinks, or when specifying a\n");
printf(" \tparticular directory more than once; refer to the\n");
printf(" \tfdupes documentation for additional information\n");
#ifdef HAVE_BTRFS_IOCTL_H
printf(" -D --dedupe \tCall the btrfs same-extent ioctl to deduplicate\n");
printf(" \tdata on disk\n");
#else
printf(" -D --dedupe \tNot available in this build of fdupes\n");
#endif
/*printf(" -l --relink \t(description)\n");*/
printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n");
printf(" \teach set of duplicates and delete the rest without\n");
Expand Down Expand Up @@ -1011,6 +1143,7 @@ int main(int argc, char **argv) {
{ "permissions", 0, 0, 'p' },
{ "order", 1, 0, 'o' },
{ "reverse", 0, 0, 'i' },
{ "dedupe", 0, 0, 'D' },
{ 0, 0, 0, 0 }
};
#define GETOPT getopt_long
Expand All @@ -1022,7 +1155,7 @@ int main(int argc, char **argv) {

oldargv = cloneargs(argc, argv);

while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:i"
while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:iD"
#ifndef OMIT_GETOPT_LONG
, long_options, NULL
#endif
Expand Down Expand Up @@ -1089,6 +1222,14 @@ int main(int argc, char **argv) {
case 'i':
SETFLAG(flags, F_REVERSE);
break;
case 'D':
#ifdef HAVE_BTRFS_IOCTL_H
SETFLAG(flags, F_DEDUPEFILES);
#else
errormsg("This version of fdupes was built without btrfs support\n");
exit(1);
#endif
break;

default:
fprintf(stderr, "Try `fdupes --help' for more information.\n");
Expand All @@ -1106,10 +1247,12 @@ int main(int argc, char **argv) {
exit(1);
}

if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
errormsg("options --summarize and --delete are not compatible\n");
exit(1);
}
if (!!ISFLAG(flags, F_SUMMARIZEMATCHES) +
!!ISFLAG(flags, F_DELETEFILES) +
!!ISFLAG(flags, F_DEDUPEFILES) > 1) {
errormsg("Only one of --summarize, --delete or --dedupe may be passed\n");
exit(1);
}

if (ISFLAG(flags, F_RECURSEAFTER)) {
firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
Expand Down Expand Up @@ -1204,15 +1347,14 @@ int main(int argc, char **argv) {
deletefiles(files, 1, stdin);
}
}

#ifdef HAVE_BTRFS_IOCTL_H
else if (ISFLAG(flags, F_DEDUPEFILES))
dedupefiles(files);
#endif
else if (ISFLAG(flags, F_SUMMARIZEMATCHES))
summarizematches(files);
else

if (ISFLAG(flags, F_SUMMARIZEMATCHES))
summarizematches(files);

else

printmatches(files);
printmatches(files);

while (files) {
curfile = files->next;
Expand Down

0 comments on commit 9b30ea1

Please sign in to comment.