From 5f21afb7ac70f6fd92959908592e077da79ff2a9 Mon Sep 17 00:00:00 2001 From: Doran Barton Date: Mon, 9 Dec 2013 12:04:38 -0700 Subject: [PATCH] Fuzzy search extended with PG fuzzystrmatch and pg_trgm extensions. --- FS/FS/Conf.pm | 7 ++ FS/FS/cust_main.pm | 2 + FS/FS/cust_main/Search.pm | 133 ++++++++++++++++++++++++++------------ 3 files changed, 101 insertions(+), 41 deletions(-) diff --git a/FS/FS/Conf.pm b/FS/FS/Conf.pm index 0eed8ee5d3..d148d69aea 100644 --- a/FS/FS/Conf.pm +++ b/FS/FS/Conf.pm @@ -3882,6 +3882,13 @@ and customer address. Include units.', 'type' => 'checkbox', }, + { + 'key' => 'fuzzy-method', + 'section' => 'UI', + 'description' => 'What underlying strategy should be used for fuzzy searches? Defaults to "String::Approx".', + 'type' => 'select', + 'select_enum' => ['String::Approx', 'PG levenschtein', 'pg_trgm'], + }, { 'key' => 'fuzzy-fuzziness', 'section' => 'UI', diff --git a/FS/FS/cust_main.pm b/FS/FS/cust_main.pm index a1d7d87d0b..45f5045c76 100644 --- a/FS/FS/cust_main.pm +++ b/FS/FS/cust_main.pm @@ -1653,6 +1653,8 @@ use FS::cust_main::Search; sub queue_fuzzyfiles_update { my $self = shift; + return unless ($conf->config('fuzzy-method') eq 'String::Approx'); + local $SIG{HUP} = 'IGNORE'; local $SIG{INT} = 'IGNORE'; local $SIG{QUIT} = 'IGNORE'; diff --git a/FS/FS/cust_main/Search.pm b/FS/FS/cust_main/Search.pm index b52b3596a2..b91ccf6629 100644 --- a/FS/FS/cust_main/Search.pm +++ b/FS/FS/cust_main/Search.pm @@ -1087,63 +1087,114 @@ sub fuzzy_search { my $self = shift; my $fuzzy = shift; # sensible defaults, then merge in any passed options + my %fuzopts = ( - 'table' => 'cust_main', - 'addl_from' => '', - 'extra_sql' => '', - 'hashref' => {}, + 'table' => 'cust_main', + 'addl_from' => '', + 'extra_sql' => '', + 'order_by' => undef, + 'extra_param' => [], + 'hashref' => {}, @_ ); - my @cust_main = (); - - my @fuzzy_mod = 'i'; - my $conf = new FS::Conf; - my $fuzziness = $conf->config('fuzzy-fuzziness'); - push @fuzzy_mod, $fuzziness if $fuzziness; - - check_and_rebuild_fuzzyfiles(); - foreach my $field ( keys %$fuzzy ) { - my $all = $self->all_X($field); - next unless scalar(@$all); + # PG levenschtein matching + if ($conf->config('fuzzy-method') eq 'PG levenschtein') { + foreach my $field ( keys %$fuzzy ) { + my $joins = {}; + if ( $field =~ /^cust_location/ and !$joins->{'cust_location'}) { + $fuzopts{'addl_from'} .= ' JOIN cust_location USING (custnum) '; + $joins->{'cust_location'} = 1; + } + elsif ( $field =~ /^contact/ and !$joins->{'contact'} ) { + $fuzopts{'addl_from'} .= ' JOIN contact USING (custnum) '; + $joins->{'contact'} = 1; + } - my %match = (); - $match{$_}=1 foreach ( amatch( $fuzzy->{$field}, \@fuzzy_mod, @$all ) ); - next if !keys(%match); + $fuzopts{'extra_sql'} .= ' AND ' if length($fuzopts{'extra_sql'}); + $fuzopts{'extra_sql'} .= " levenshtein(lower($field), lower(?)) < $fuzziness "; + push @{$fuzopts{'extra_param'}}, $fuzzy->{$field}; + } - my $in_matches = 'IN (' . - join(',', map { dbh->quote($_) } keys %match) . - ')'; + return qsearch({ + %fuzopts, + debug => 1, + }); - my $extra_sql = $fuzopts{extra_sql}; - if ($extra_sql =~ /^\s*where /i or keys %{ $fuzopts{hashref} }) { - $extra_sql .= ' AND '; - } else { - $extra_sql .= 'WHERE '; - } - $extra_sql .= "$field $in_matches"; + } # pg_trgm + elsif ($conf->config('fuzzy-method') eq 'pg_trgm') { + + if ($fuzziness) { + dbh->do("SELECT set_limit(?)", {}, $fuzziness); + } + my $joins = {}; + foreach my $field ( keys %$fuzzy ) { + if ( $field =~ /^cust_location/ and !$joins->{'cust_location'}) { + $fuzopts{'addl_from'} .= ' JOIN cust_location USING (custnum) '; + $joins->{'cust_location'} = 1; + } + elsif ( $field =~ /^contact/ and !$joins->{'contact'} ) { + $fuzopts{'addl_from'} .= ' JOIN contact USING (custnum) '; + $joins->{'contact'} = 1; + } - my $addl_from = $fuzopts{addl_from}; - if ( $field =~ /^cust_location\./ ) { - $addl_from .= ' JOIN cust_location USING (custnum)'; - } elsif ( $field =~ /^contact\./ ) { - $addl_from .= ' JOIN contact USING (custnum)'; + $fuzopts{'extra_sql'} .= " AND $field % ? "; + push @{$fuzopts{'extra_param'}}, $fuzzy->{$field}; } - push @cust_main, qsearch({ + return qsearch({ %fuzopts, - 'addl_from' => $addl_from, - 'extra_sql' => $extra_sql, }); - } + } # The old String::Approx method + else { + my @cust_main = (); - # we want the components of $fuzzy ANDed, not ORed, but still don't want dupes - my %saw = (); - @cust_main = grep { ++$saw{$_->custnum} == scalar(keys %$fuzzy) } @cust_main; + my @fuzzy_mod = 'i'; + push @fuzzy_mod, $fuzziness if $fuzziness; - @cust_main; + check_and_rebuild_fuzzyfiles(); + foreach my $field ( keys %$fuzzy ) { + + my $all = $self->all_X($field); + next unless scalar(@$all); + my %match = (); + $match{$_}=1 foreach ( amatch( $fuzzy->{$field}, \@fuzzy_mod, @$all ) ); + next if !keys(%match); + + my $in_matches = 'IN (' . + join(',', map { dbh->quote($_) } keys %match) . + ')'; + + my $extra_sql = $fuzopts{extra_sql}; + if ($extra_sql =~ /^\s*where /i or keys %{ $fuzopts{hashref} }) { + $extra_sql .= ' AND '; + } else { + $extra_sql .= 'WHERE '; + } + $extra_sql .= "$field $in_matches"; + + my $addl_from = $fuzopts{addl_from}; + if ( $field =~ /^cust_location\./ ) { + $addl_from .= ' JOIN cust_location USING (custnum)'; + } elsif ( $field =~ /^contact\./ ) { + $addl_from .= ' JOIN contact USING (custnum)'; + } + + push @cust_main, qsearch({ + %fuzopts, + 'addl_from' => $addl_from, + 'extra_sql' => $extra_sql, + }); + } + + # we want the components of $fuzzy ANDed, not ORed, but still don't want dupes + my %saw = (); + @cust_main = grep { ++$saw{$_->custnum} == scalar(keys %$fuzzy) } @cust_main; + + return @cust_main; + } } =back