diff --git a/Ack.pm b/Ack.pm index 0fc88bd8..92006756 100644 --- a/Ack.pm +++ b/Ack.pm @@ -346,6 +346,10 @@ File inclusion/exclusion: filetype. --type=noX Exclude X files. See "ack --help-types" for supported filetypes. + --max-file-size=NUM, --max-size=NUM + Excludes files larger than this size (in bytes) + --min-file-size=NUM, --min-size=NUM + Excludes files smaller than this size (in bytes) File type specification: --type-set TYPE:FILTER:FILTERARGS diff --git a/ConfigLoader.pm b/ConfigLoader.pm index 2169e193..8166fbee 100644 --- a/ConfigLoader.pm +++ b/ConfigLoader.pm @@ -332,6 +332,10 @@ EOT => \$opt->{L}, 'm|max-count=i' => \$opt->{m}, 'match=s' => \$opt->{regex}, + 'max-size|max-file-size=s' + => \$opt->{max_file_size}, + 'min-size|min-file-size=s' + => \$opt->{min_file_size}, 'n|no-recurse' => \$opt->{n}, o => sub { $opt->{output} = '$&' }, 'output=s' => \$opt->{output}, diff --git a/MANIFEST b/MANIFEST index e5da5fc5..d79bac32 100644 --- a/MANIFEST +++ b/MANIFEST @@ -69,6 +69,7 @@ t/ack-print0.t t/ack-removed-options.t t/ack-show-types.t t/ack-s.t +t/ack-size.pl t/ack-type-del.t t/ack-type.t t/ack-v.t diff --git a/ack b/ack index 467078eb..8665cd0d 100644 --- a/ack +++ b/ack @@ -98,6 +98,23 @@ MAIN: { main(); } +sub _parse_size { + my $s = $_[0] || return 0; + + if ( $s =~ m/^\s*(\d+(?:\.\d+)?)(?:\s*([KMGT]?)(?:(i?)B)?)?\s*$/i ) { + my $n = $1; + if ($2) { + my $u = lc $2; + my $i = $3 ? 1024 : 1000; # 1KiB = 1024B; 1KB = 1000B + $n *= $i while $u =~ tr/tgmk/gmk/d; + } + return int $n; + } + else { + Carp::croak('Invalid size'); + } +} + sub _compile_descend_filter { my ( $opt ) = @_; @@ -146,6 +163,19 @@ sub _compile_file_filter { } } + # For the usual case where the user has not set this, it is faster if we can + # we can reduce to a single boolean test before we even make the method call + # if both of min and max are 0, don't test, accept all files + + my ( $min_file_size, $max_file_size ) = map { _parse_size( $opt->{"${_}_file_size"} ) } qw ( min max ); + my $size_filter = ( $min_file_size || $max_file_size ) + ? sub { + my $size = (-s _) || 0; # paranoid? + return 0 if $max_file_size and $size > $max_file_size; + return $size >= $min_file_size; + } + : 0; + my %is_member_of_starting_set = map { (get_file_id($_) => 1) } @{$start}; my @ignore_dir_filter = @{$opt->{idirs} || []}; @@ -227,6 +257,13 @@ sub _compile_file_filter { return 0; } + # Warning: the size filter uses -s _: don't stat any other files + # or else you will break it. + # + # Also, it assumes we have a file name as named pipes are filtered out + # earlier. + return 0 if $size_filter && ! $size_filter->($resource); + my $match_found = $direct_filters->filter($resource); # Don't bother invoking inverse filters unless we consider the current resource a match @@ -1410,6 +1447,21 @@ Print this manual page. No descending into subdirectories. +=item B<--max-file-size=I>, B<--max-size=I> + +The maximum size of files C is willing to search. + +This is useful for when you know you have a handful of extremely large files +which you do not need to search, but whose distinguishing feature is their size. + +If not set, or set to 0, then there is no maximum. + +=item B<--min-file-size=I>, B<--min-size=I> + +The minimum size of files C is willing to search. + +If not set, or set to 0, then there is no maximum. + =item B<-o> Show only the part of each line matching PATTERN (turns off text diff --git a/t/Util.pm b/t/Util.pm index a14acd61..78bc41f6 100644 --- a/t/Util.pm +++ b/t/Util.pm @@ -679,6 +679,10 @@ sub get_options { '--man', '--match', '--max-count', + '--max-file-size', + '--max-size', + '--min-file-size', + '--min-size', '--no-filename', '--no-recurse', '--nobreak', diff --git a/t/ack-size.t b/t/ack-size.t new file mode 100644 index 00000000..9850b375 --- /dev/null +++ b/t/ack-size.t @@ -0,0 +1,40 @@ +#!perl -T + +use strict; +use warnings; + +use Test::More tests => 12; +use lib 't'; +use Util; + +prep_environment(); + +my ( $stdout, $stderr ); +my $help_types_output; + +# sanity check +( $stdout, $stderr ) = run_ack_with_stderr('--perl', '-f', 't/swamp'); +is( scalar(@{$stdout}), 11, 'Found initial 11 files' ); +is_empty_array( $stderr, 'Nothing in stderr' ); + +( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=0', '-f', 't/swamp'); +is( scalar(@{$stdout}), 11, 'Found initial 11 files (max of 0 has no effect)' ); +is_empty_array( $stderr, 'Nothing in stderr' ); + +( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=100', '-f', 't/swamp'); +is( scalar(@{$stdout}), 3, 'Found 3 files <= 100 bytes large' ); +is_empty_array( $stderr, 'Nothing in stderr' ); + +( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=101', '-f', 't/swamp'); +is( scalar(@{$stdout}), 3, 'Found 8 files >= 101 bytes large' ); +is_empty_array( $stderr, 'Nothing in stderr' ); + +( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--min-file-size=101', '--max-file-size=150', '-f', 't/swamp'); +is( scalar(@{$stdout}), 1, 'Found 1 file where 101 <= size <= 150' ); +is_empty_array( $stderr, 'Nothing in stderr' ); + +( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=100', '--min-file-size=101', '-f', 't/swamp'); +is( scalar(@{$stdout}), 0, 'Found no files when max and min conflict' ); +is_empty_array( $stderr, 'Nothing in stderr' ); + +# done testing diff --git a/t/config-loader.t b/t/config-loader.t index 030a10e5..b75aeec0 100644 --- a/t/config-loader.t +++ b/t/config-loader.t @@ -36,6 +36,8 @@ my %defaults = ( l => undef, L => undef, m => undef, + max_file_size => undef, + min_file_size => undef, n => undef, output => undef, pager => undef, @@ -84,6 +86,34 @@ test_loader( '--before-context should set before_context' ); +test_loader( + argv => ['--max-size=1500'], + expected_opts => { %defaults, max_file_size => 1500 }, + expected_targets => [], + '--max-size should set max_file_size' +); + +test_loader( + argv => ['--max-file-size=1500'], + expected_opts => { %defaults, max_file_size => 1500 }, + expected_targets => [], + '--max-file-size should set max_file_size' +); + +test_loader( + argv => ['--min-size=1500'], + expected_opts => { %defaults, min_file_size => 1500 }, + expected_targets => [], + '--min-size should set min_file_size' +); + +test_loader( + argv => ['--min-file-size=1500'], + expected_opts => { %defaults, min_file_size => 1500 }, + expected_targets => [], + '--min-file-size should set min_file_size' +); + # XXX These tests should all be replicated to work off of the ack command line # tools instead of its internal APIs! do {