Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maximum and minimum file size #534

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Ack.pm
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ File inclusion/exclusion:
filetype.
--type=noX Exclude X files.
See "ack --help-types" for supported filetypes.
--max-file-size=NUM, --max-size=NUM
Excludes files larger than this size (in bytes)
--min-file-size=NUM, --min-size=NUM
Excludes files smaller than this size (in bytes)

File type specification:
--type-set TYPE:FILTER:FILTERARGS
Expand Down
4 changes: 4 additions & 0 deletions ConfigLoader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ EOT
=> \$opt->{L},
'm|max-count=i' => \$opt->{m},
'match=s' => \$opt->{regex},
'max-size|max-file-size=s'
=> \$opt->{max_file_size},
'min-size|min-file-size=s'
=> \$opt->{min_file_size},
'n|no-recurse' => \$opt->{n},
o => sub { $opt->{output} = '$&' },
'output=s' => \$opt->{output},
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ t/ack-print0.t
t/ack-removed-options.t
t/ack-show-types.t
t/ack-s.t
t/ack-size.pl
t/ack-type-del.t
t/ack-type.t
t/ack-v.t
Expand Down
52 changes: 52 additions & 0 deletions ack
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,23 @@ MAIN: {
main();
}

sub _parse_size {
my $s = $_[0] || return 0;

if ( $s =~ m/^\s*(\d+(?:\.\d+)?)(?:\s*([KMGT]?)(?:(i?)B)?)?\s*$/i ) {
my $n = $1;
if ($2) {
my $u = lc $2;
my $i = $3 ? 1024 : 1000; # 1KiB = 1024B; 1KB = 1000B
$n *= $i while $u =~ tr/tgmk/gmk/d;
}
return int $n;
}
else {
Carp::croak('Invalid size');
}
}

sub _compile_descend_filter {
my ( $opt ) = @_;

Expand Down Expand Up @@ -146,6 +163,19 @@ sub _compile_file_filter {
}
}

# For the usual case where the user has not set this, it is faster if we can
# we can reduce to a single boolean test before we even make the method call
# if both of min and max are 0, don't test, accept all files

my ( $min_file_size, $max_file_size ) = map { _parse_size( $opt->{"${_}_file_size"} ) } qw ( min max );
my $size_filter = ( $min_file_size || $max_file_size )
? sub {
my $size = (-s _) || 0; # paranoid?
return 0 if $max_file_size and $size > $max_file_size;
return $size >= $min_file_size;
}
: 0;

my %is_member_of_starting_set = map { (get_file_id($_) => 1) } @{$start};

my @ignore_dir_filter = @{$opt->{idirs} || []};
Expand Down Expand Up @@ -227,6 +257,13 @@ sub _compile_file_filter {
return 0;
}

# Warning: the size filter uses -s _: don't stat any other files
# or else you will break it.
#
# Also, it assumes we have a file name as named pipes are filtered out
# earlier.
return 0 if $size_filter && ! $size_filter->($resource);

my $match_found = $direct_filters->filter($resource);

# Don't bother invoking inverse filters unless we consider the current resource a match
Expand Down Expand Up @@ -1410,6 +1447,21 @@ Print this manual page.

No descending into subdirectories.

=item B<--max-file-size=I<NUM>>, B<--max-size=I<NUM>>

The maximum size of files C<ack> is willing to search.

This is useful for when you know you have a handful of extremely large files
which you do not need to search, but whose distinguishing feature is their size.

If not set, or set to 0, then there is no maximum.

=item B<--min-file-size=I<NUM>>, B<--min-size=I<NUM>>

The minimum size of files C<ack> is willing to search.

If not set, or set to 0, then there is no maximum.

=item B<-o>

Show only the part of each line matching PATTERN (turns off text
Expand Down
4 changes: 4 additions & 0 deletions t/Util.pm
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,10 @@ sub get_options {
'--man',
'--match',
'--max-count',
'--max-file-size',
'--max-size',
'--min-file-size',
'--min-size',
'--no-filename',
'--no-recurse',
'--nobreak',
Expand Down
40 changes: 40 additions & 0 deletions t/ack-size.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!perl -T

use strict;
use warnings;

use Test::More tests => 12;
use lib 't';
use Util;

prep_environment();

my ( $stdout, $stderr );
my $help_types_output;

# sanity check
( $stdout, $stderr ) = run_ack_with_stderr('--perl', '-f', 't/swamp');
is( scalar(@{$stdout}), 11, 'Found initial 11 files' );
is_empty_array( $stderr, 'Nothing in stderr' );

( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=0', '-f', 't/swamp');
is( scalar(@{$stdout}), 11, 'Found initial 11 files (max of 0 has no effect)' );
is_empty_array( $stderr, 'Nothing in stderr' );

( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=100', '-f', 't/swamp');
is( scalar(@{$stdout}), 3, 'Found 3 files <= 100 bytes large' );
is_empty_array( $stderr, 'Nothing in stderr' );

( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=101', '-f', 't/swamp');
is( scalar(@{$stdout}), 3, 'Found 8 files >= 101 bytes large' );
is_empty_array( $stderr, 'Nothing in stderr' );

( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--min-file-size=101', '--max-file-size=150', '-f', 't/swamp');
is( scalar(@{$stdout}), 1, 'Found 1 file where 101 <= size <= 150' );
is_empty_array( $stderr, 'Nothing in stderr' );

( $stdout, $stderr ) = run_ack_with_stderr('--perl', '--max-file-size=100', '--min-file-size=101', '-f', 't/swamp');
is( scalar(@{$stdout}), 0, 'Found no files when max and min conflict' );
is_empty_array( $stderr, 'Nothing in stderr' );

# done testing
30 changes: 30 additions & 0 deletions t/config-loader.t
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ my %defaults = (
l => undef,
L => undef,
m => undef,
max_file_size => undef,
min_file_size => undef,
n => undef,
output => undef,
pager => undef,
Expand Down Expand Up @@ -84,6 +86,34 @@ test_loader(
'--before-context should set before_context'
);

test_loader(
argv => ['--max-size=1500'],
expected_opts => { %defaults, max_file_size => 1500 },
expected_targets => [],
'--max-size should set max_file_size'
);

test_loader(
argv => ['--max-file-size=1500'],
expected_opts => { %defaults, max_file_size => 1500 },
expected_targets => [],
'--max-file-size should set max_file_size'
);

test_loader(
argv => ['--min-size=1500'],
expected_opts => { %defaults, min_file_size => 1500 },
expected_targets => [],
'--min-size should set min_file_size'
);

test_loader(
argv => ['--min-file-size=1500'],
expected_opts => { %defaults, min_file_size => 1500 },
expected_targets => [],
'--min-file-size should set min_file_size'
);

# XXX These tests should all be replicated to work off of the ack command line
# tools instead of its internal APIs!
do {
Expand Down