Skip to content
This repository has been archived by the owner on Dec 4, 2023. It is now read-only.

Commit

Permalink
fuzzy match in SQL and align with existing C++ matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
aakropotkin committed Aug 23, 2023
1 parent d096dc4 commit 215dde5
Show file tree
Hide file tree
Showing 5 changed files with 243 additions and 49 deletions.
23 changes: 18 additions & 5 deletions include/flox/pkgdb/read.hh
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,27 @@ class PkgDbReadOnly {

}; /* End class `PkgDbReadOnly' */


/* -------------------------------------------------------------------------- */

enum match_strength {
MS_EXACT_PNAME = 0
, MS_PARTIAL_PNAME_DESC = 1
, MS_PARTIAL_PNAME = 2
, MS_PARTIAL_DESC = 3
, MS_NONE = 4 /* Ensure this is always the highest. */
};

/**
* Calculate a distance that can be used to order packages by how close they
* are to a match string.
* @param Package The Package to judge distance from.
* Calculate a "strength" ranking that can be used to order packages by how
* closely they a match string.
* @param pkg The Package to be matched against.
* @param match String to look for in Package's fields.
* @return Distance between pkg and match.
* @return _match strength_ of @a match for @a pkg.
*/
std::optional<size_t> distanceFromMatch( Package & pkg, std::string match );
match_strength distanceFromMatch( const Package & pkg
, std::string_view match
);

/* -------------------------------------------------------------------------- */

Expand Down
31 changes: 27 additions & 4 deletions src/pkgdb/query-builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ buildPkgQuery( const PkgQueryArgs & params )
}

SelectModel q;
q.select( "id" ).from( "v_PackagesSearch" );
q.select( "id" ).select( "semver" ).from( "v_PackagesSearch" );
std::unordered_map<std::string, std::string> binds;

if ( params.name.has_value() )
Expand All @@ -175,16 +175,40 @@ buildPkgQuery( const PkgQueryArgs & params )
binds.emplace( ":pname", * params.pname );
}

if ( params.match.has_value() && !params.match->empty() )
if ( params.match.has_value() && ( ! params.match->empty() ) )
{
q.where( "( name LIKE '%:match%' ) OR ( description LIKE '%:match%' )" );
q.where(
"( ( pname LIKE :match ) OR ( description LIKE :match ) )"
);
/* XXX: These values must align with `match_strength'.
* While we could use `bind' or `fmt' here, hard-coding them is fine -
* these are explicitly audited by the test suite. */
q.select( R"SQL(
iif( ( ( '%' || pname || '%' ) = :match )
, 0
, iif( ( pname LIKE :match )
, iif( ( description LIKE :match ), 1, 2 )
, 3
)
) AS matchStrength
)SQL" );
binds.emplace( ":match", "%" + ( * params.match ) + "%" );
q.order_by( "matchStrength" );
}
else
{
q.select( "NULL AS matchStrength" );
}

if ( params.version.has_value() )
{
q.where( column( "version" ) == Param( ":version" ) );
binds.emplace( ":version", * params.version );
}
else if ( params.semver.has_value() )
{
q.where( column( "semver" ).is_not_null() );
}

if ( params.licenses.has_value() && ( ! params.licenses->empty() ) )
{
Expand Down Expand Up @@ -249,7 +273,6 @@ buildPkgQuery( const PkgQueryArgs & params )
}
}

// TODO: Match
// TODO: Semver and pre-releases
// TODO: Sort/"order by" results

Expand Down
57 changes: 29 additions & 28 deletions src/pkgdb/read.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,6 @@ PkgDbReadOnly::getPackagePath( row_id id )

/* -------------------------------------------------------------------------- */


std::vector<row_id>
PkgDbReadOnly::getDescendantAttrSets( row_id root )
{
Expand Down Expand Up @@ -321,45 +320,47 @@ PkgDbReadOnly::getDescendantAttrSets( row_id root )

/* -------------------------------------------------------------------------- */

std::optional<size_t>
distanceFromMatch( Package & pkg, std::string match )
match_strength
distanceFromMatch( const Package & pkg, std::string_view match )
{
if ( match.empty() ) { return std::nullopt; }
if ( match.empty() ) { return MS_NONE; }

std::string pname = pkg.getPname();
// TODO match on attrName. That's not currently possible because attrName is
// meaningful for flakes, but for catalogs, only the attrName of parent is
// meaningful (attrName is a version string).
/* TODO match on attrName. That's not currently possible because attrName is
* meaningful for flakes, but for catalogs, only the attrName of parent is
* meaningful (attrName is a version string). */

// Don't give description any weight if pname matches exactly. It's not
// especially meaningful if a description mentions its own name.
/* Don't give description any weight if pname matches exactly.
* It's not especially meaningful if a description mentions its own name. */
if ( pname == match )
{
// pname matches exactly
return 0;
/* pname matches exactly */
return MS_EXACT_PNAME;
}

bool pnameMatches = (pname.find(match) != std::string::npos);
auto description = pkg.getDescription();
bool descriptionMatches = (description.has_value() && description->find(match) != std::string::npos);

if ( pnameMatches ) {
if ( descriptionMatches )
{
// pname and description match
return 1;
}
// only pname matches
return 2;
}
auto description = pkg.getDescription();
bool descriptionMatches = description.has_value() &&
( description->find( match ) != std::string::npos );

if ( pname.find( match ) != std::string::npos )
{
if ( descriptionMatches )
{
/* pname and description match */
return MS_PARTIAL_PNAME_DESC;
}
/* only pname matches */
return MS_PARTIAL_PNAME;
}

if ( descriptionMatches )
{
// only description matches
return 3;
/* only description matches */
return MS_PARTIAL_DESC;
}
// nothing matches
return 4;

/* nothing matches */
return MS_NONE;
}

/* -------------------------------------------------------------------------- */
Expand Down
159 changes: 158 additions & 1 deletion tests/pkgdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ test_buildPkgQuery1( flox::pkgdb::PkgDb & db )
{
throw flox::pkgdb::PkgDbException(
db.dbPath
, nix::fmt( "Failed to write Package 'hello':(%d) %s"
, nix::fmt( "Failed to write Packages:(%d) %s"
, rc
, db.db.error_msg()
)
Expand Down Expand Up @@ -589,6 +589,162 @@ test_buildPkgQuery1( flox::pkgdb::PkgDb & db )
}


/* -------------------------------------------------------------------------- */

/* Tests `match' filtering. */
bool
test_buildPkgQuery2( flox::pkgdb::PkgDb & db )
{
clearTables( db );

/* Make a package */
row_id linux =
db.addOrGetAttrSetId( flox::AttrPath { "legacyPackages", "x86_64-linux" } );
row_id descGreet =
db.addOrGetDescriptionId( "A program with a friendly hello" );
row_id descFarewell =
db.addOrGetDescriptionId( "A program with a friendly farewell" );
sqlite3pp::command cmd( db.db, R"SQL(
INSERT INTO Packages (
parentId, attrName, name, pname, outputs, descriptionId
) VALUES
( :parentId, 'aHello', 'hello-2.12.1', 'hello', '["out"]', :descGreetId
)
, ( :parentId, 'aGoodbye', 'goodbye-2.12.1', 'goodbye'
, '["out"]', :descFarewellId
)
, ( :parentId, 'aHola', 'hola-2.12.1', 'hola', '["out"]', :descGreetId
)
, ( :parentId, 'aCiao', 'ciao-2.12.1', 'ciao', '["out"]', :descFarewellId
)
)SQL" );
cmd.bind( ":parentId", (long long) linux );
cmd.bind( ":descGreetId", (long long) descGreet );
cmd.bind( ":descFarewellId", (long long) descFarewell );
if ( flox::pkgdb::sql_rc rc = cmd.execute_all();
flox::pkgdb::isSQLError( rc )
)
{
throw flox::pkgdb::PkgDbException(
db.dbPath
, nix::fmt( "Failed to write Packages:(%d) %s"
, rc
, db.db.error_msg()
)
);
}
flox::pkgdb::PkgQueryArgs qargs = {
.match = std::nullopt
, .name = std::nullopt
, .pname = std::nullopt
, .version = std::nullopt
, .semver = std::nullopt
, .licenses = std::nullopt
, .allowBroken = false
, .allowUnfree = true
, .preferPreReleases = false
, .subtrees = std::nullopt
, .systems = std::vector<std::string> { "x86_64-linux" }
, .stabilities = std::nullopt
};

/* Run `match = "hello"' query */
{
qargs.match = "hello";
auto [query, binds] = flox::pkgdb::buildPkgQuery( qargs );
qargs.match = std::nullopt;
sqlite3pp::query qry( db.db, query.c_str() );
for ( const auto & [var, val] : binds )
{
qry.bind( var.c_str(), val, sqlite3pp::copy );
}
size_t count = 0;
for ( const auto r : qry )
{
(void) r;
++count;
flox::pkgdb::match_strength strength =
(flox::pkgdb::match_strength) r.get<int>( 2 );
if ( count == 1 )
{
EXPECT_EQ( strength, flox::pkgdb::MS_EXACT_PNAME );
}
else
{
EXPECT_EQ( strength, flox::pkgdb::MS_PARTIAL_DESC );
}
}
EXPECT_EQ( count, (size_t) 2 );
}

/* Run `match = "farewell"' query */
{
qargs.match = "farewell";
auto [query, binds] = flox::pkgdb::buildPkgQuery( qargs );
qargs.match = std::nullopt;
sqlite3pp::query qry( db.db, query.c_str() );
for ( const auto & [var, val] : binds )
{
qry.bind( var.c_str(), val, sqlite3pp::copy );
}
size_t count = 0;
for ( const auto r : qry )
{
(void) r;
++count;
EXPECT_EQ( r.get<int>( 2 ), flox::pkgdb::MS_PARTIAL_DESC );
}
EXPECT_EQ( count, (size_t) 2 );
}

/* Run `match = "hel"' query */
{
qargs.match = "hel";
auto [query, binds] = flox::pkgdb::buildPkgQuery( qargs );
qargs.match = std::nullopt;
sqlite3pp::query qry( db.db, query.c_str() );
for ( const auto & [var, val] : binds )
{
qry.bind( var.c_str(), val, sqlite3pp::copy );
}
size_t count = 0;
for ( const auto r : qry )
{
(void) r;
++count;
flox::pkgdb::match_strength strength =
(flox::pkgdb::match_strength) r.get<int>( 2 );
if ( count == 1 )
{
EXPECT_EQ( strength, flox::pkgdb::MS_PARTIAL_PNAME_DESC );
}
else
{
EXPECT_EQ( strength, flox::pkgdb::MS_PARTIAL_DESC );
}
}
EXPECT_EQ( count, (size_t) 2 );
}

/* Run `match = "xxxxx"' query */
{
qargs.match = "xxxxx";
auto [query, binds] = flox::pkgdb::buildPkgQuery( qargs );
qargs.match = std::nullopt;
sqlite3pp::query qry( db.db, query.c_str() );
for ( const auto & [var, val] : binds )
{
qry.bind( var.c_str(), val, sqlite3pp::copy );
}
size_t count = 0;
for ( const auto r : qry ) { (void) r; ++count; }
EXPECT_EQ( count, (size_t) 0 );
}

return true;
}


/* ========================================================================== */

int
Expand Down Expand Up @@ -656,6 +812,7 @@ main( int argc, char * argv[] )

RUN_TEST( buildPkgQuery0, db );
RUN_TEST( buildPkgQuery1, db );
RUN_TEST( buildPkgQuery2, db );

}

Expand Down
22 changes: 11 additions & 11 deletions tests/read.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ using namespace flox;
test_distanceFromMatch()
{
std::tuple<char const*, char const*, size_t> cases[] = {
{ "match", "match", 0 }
, { "match", "partial match", 0 }
, { "match", "miss", 0 }
, { "partial match", "match", 1 }
, { "partial match", "partial match", 1 }
, { "partial match", "miss", 2 }
, { "miss", "match", 3 }
, { "miss", "partial match", 3 }
, { "miss", "miss", 4 }
{ "match", "match", flox::pkgdb::MS_EXACT_PNAME }
, { "match", "partial match", flox::pkgdb::MS_EXACT_PNAME }
, { "match", "miss", flox::pkgdb::MS_EXACT_PNAME }
, { "partial match", "match", flox::pkgdb::MS_PARTIAL_PNAME_DESC }
, { "partial match", "partial match", flox::pkgdb::MS_PARTIAL_PNAME_DESC }
, { "partial match", "miss", flox::pkgdb::MS_PARTIAL_PNAME }
, { "miss", "match", flox::pkgdb::MS_PARTIAL_DESC }
, { "miss", "partial match", flox::pkgdb::MS_PARTIAL_DESC }
, { "miss", "miss", flox::pkgdb::MS_NONE }
};

RawPackage pkg;
Expand All @@ -48,11 +48,11 @@ test_distanceFromMatch()
, { "pname", pname }
, { "description", description }
} );
EXPECT_EQ( * pkgdb::distanceFromMatch( pkg, "match"), distance );
EXPECT_EQ( pkgdb::distanceFromMatch( pkg, "match"), distance );
}

/* Should return std::nullopt for empty match string. */
EXPECT( pkgdb::distanceFromMatch( pkg, "" ) == std::nullopt );
EXPECT( pkgdb::distanceFromMatch( pkg, "" ) == flox::pkgdb::MS_NONE );
return true;
}

Expand Down

0 comments on commit 215dde5

Please sign in to comment.