Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzzy search extended with PG fuzzystrmatch and pg_trgm extensions. #22

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions FS/FS/Conf.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3882,6 +3882,13 @@ and customer address. Include units.',
'type' => 'checkbox',
},

{
'key' => 'fuzzy-method',
'section' => 'UI',
'description' => 'What underlying strategy should be used for fuzzy searches? Defaults to "String::Approx".',
'type' => 'select',
'select_enum' => ['String::Approx', 'PG levenschtein', 'pg_trgm'],
},
{
'key' => 'fuzzy-fuzziness',
'section' => 'UI',
Expand Down
2 changes: 2 additions & 0 deletions FS/FS/cust_main.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,8 @@ use FS::cust_main::Search;
sub queue_fuzzyfiles_update {
my $self = shift;

return unless ($conf->config('fuzzy-method') eq 'String::Approx');

local $SIG{HUP} = 'IGNORE';
local $SIG{INT} = 'IGNORE';
local $SIG{QUIT} = 'IGNORE';
Expand Down
133 changes: 92 additions & 41 deletions FS/FS/cust_main/Search.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1087,63 +1087,114 @@ sub fuzzy_search {
my $self = shift;
my $fuzzy = shift;
# sensible defaults, then merge in any passed options

my %fuzopts = (
'table' => 'cust_main',
'addl_from' => '',
'extra_sql' => '',
'hashref' => {},
'table' => 'cust_main',
'addl_from' => '',
'extra_sql' => '',
'order_by' => undef,
'extra_param' => [],
'hashref' => {},
@_
);

my @cust_main = ();

my @fuzzy_mod = 'i';
my $conf = new FS::Conf;
my $fuzziness = $conf->config('fuzzy-fuzziness');
push @fuzzy_mod, $fuzziness if $fuzziness;

check_and_rebuild_fuzzyfiles();
foreach my $field ( keys %$fuzzy ) {

my $all = $self->all_X($field);
next unless scalar(@$all);
# PG levenschtein matching
if ($conf->config('fuzzy-method') eq 'PG levenschtein') {
foreach my $field ( keys %$fuzzy ) {
my $joins = {};
if ( $field =~ /^cust_location/ and !$joins->{'cust_location'}) {
$fuzopts{'addl_from'} .= ' JOIN cust_location USING (custnum) ';
$joins->{'cust_location'} = 1;
}
elsif ( $field =~ /^contact/ and !$joins->{'contact'} ) {
$fuzopts{'addl_from'} .= ' JOIN contact USING (custnum) ';
$joins->{'contact'} = 1;
}

my %match = ();
$match{$_}=1 foreach ( amatch( $fuzzy->{$field}, \@fuzzy_mod, @$all ) );
next if !keys(%match);
$fuzopts{'extra_sql'} .= ' AND ' if length($fuzopts{'extra_sql'});
$fuzopts{'extra_sql'} .= " levenshtein(lower($field), lower(?)) < $fuzziness ";
push @{$fuzopts{'extra_param'}}, $fuzzy->{$field};
}

my $in_matches = 'IN (' .
join(',', map { dbh->quote($_) } keys %match) .
')';
return qsearch({
%fuzopts,
debug => 1,
});

my $extra_sql = $fuzopts{extra_sql};
if ($extra_sql =~ /^\s*where /i or keys %{ $fuzopts{hashref} }) {
$extra_sql .= ' AND ';
} else {
$extra_sql .= 'WHERE ';
}
$extra_sql .= "$field $in_matches";
} # pg_trgm
elsif ($conf->config('fuzzy-method') eq 'pg_trgm') {

if ($fuzziness) {
dbh->do("SELECT set_limit(?)", {}, $fuzziness);
}
my $joins = {};
foreach my $field ( keys %$fuzzy ) {
if ( $field =~ /^cust_location/ and !$joins->{'cust_location'}) {
$fuzopts{'addl_from'} .= ' JOIN cust_location USING (custnum) ';
$joins->{'cust_location'} = 1;
}
elsif ( $field =~ /^contact/ and !$joins->{'contact'} ) {
$fuzopts{'addl_from'} .= ' JOIN contact USING (custnum) ';
$joins->{'contact'} = 1;
}

my $addl_from = $fuzopts{addl_from};
if ( $field =~ /^cust_location\./ ) {
$addl_from .= ' JOIN cust_location USING (custnum)';
} elsif ( $field =~ /^contact\./ ) {
$addl_from .= ' JOIN contact USING (custnum)';
$fuzopts{'extra_sql'} .= " AND $field % ? ";
push @{$fuzopts{'extra_param'}}, $fuzzy->{$field};
}

push @cust_main, qsearch({
return qsearch({
%fuzopts,
'addl_from' => $addl_from,
'extra_sql' => $extra_sql,
});
}
} # The old String::Approx method
else {
my @cust_main = ();

# we want the components of $fuzzy ANDed, not ORed, but still don't want dupes
my %saw = ();
@cust_main = grep { ++$saw{$_->custnum} == scalar(keys %$fuzzy) } @cust_main;
my @fuzzy_mod = 'i';
push @fuzzy_mod, $fuzziness if $fuzziness;

@cust_main;
check_and_rebuild_fuzzyfiles();
foreach my $field ( keys %$fuzzy ) {

my $all = $self->all_X($field);
next unless scalar(@$all);

my %match = ();
$match{$_}=1 foreach ( amatch( $fuzzy->{$field}, \@fuzzy_mod, @$all ) );
next if !keys(%match);

my $in_matches = 'IN (' .
join(',', map { dbh->quote($_) } keys %match) .
')';

my $extra_sql = $fuzopts{extra_sql};
if ($extra_sql =~ /^\s*where /i or keys %{ $fuzopts{hashref} }) {
$extra_sql .= ' AND ';
} else {
$extra_sql .= 'WHERE ';
}
$extra_sql .= "$field $in_matches";

my $addl_from = $fuzopts{addl_from};
if ( $field =~ /^cust_location\./ ) {
$addl_from .= ' JOIN cust_location USING (custnum)';
} elsif ( $field =~ /^contact\./ ) {
$addl_from .= ' JOIN contact USING (custnum)';
}

push @cust_main, qsearch({
%fuzopts,
'addl_from' => $addl_from,
'extra_sql' => $extra_sql,
});
}

# we want the components of $fuzzy ANDed, not ORed, but still don't want dupes
my %saw = ();
@cust_main = grep { ++$saw{$_->custnum} == scalar(keys %$fuzzy) } @cust_main;

return @cust_main;
}
}

=back
Expand Down