Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add raw filters #2

Merged
merged 3 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ HeaderFilterRegex: ''
FormatStyle: none

CheckOptions:
- key: readability-identifier-length.IgnoredVariableNames

- key: readability-identifier-length.IgnoredVariableNames
value: 'x|y|z'
- key: readability-identifier-length.IgnoredParameterNames
- key: readability-identifier-length.IgnoredParameterNames
value: 'x|y|z'
- key: bugprone-easily-swappable-parameters.MinimumLength
value: '3'
56 changes: 52 additions & 4 deletions include/sparser.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
#pragma once
#ifndef SPARSER_H_
#define SPARSER_H_

#include <algorithm>
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <vector>

constexpr size_t kRfSize = 4;

struct Predicate {
std::string value;
};
Expand All @@ -15,8 +22,49 @@ struct PredicateDisjunction {
std::vector<PredicateConjunction> conjunctions;
};

struct SparserQuery {
PredicateDisjunction disjunction;
class SparserQuery {
private:
PredicateDisjunction disjunction_;

public:
explicit SparserQuery(const PredicateDisjunction& disjunction) : disjunction_(disjunction) {}

[[nodiscard]] const PredicateDisjunction& get_disjunction() const { return disjunction_; }

[[nodiscard]] std::string ToString() const;
friend std::ostream& operator<<(std::ostream& os, const SparserQuery& query);
};

struct RawFilterData {
std::vector<std::string_view> raw_filters;
std::vector<size_t> conjunctive_indices;
std::vector<size_t> predicate_indices;
};

class RawFilter {
private:
std::string_view value_;
size_t conjunctive_index_;
size_t predicate_index_;

public:
explicit RawFilter(const std::string_view& value, size_t conjunctive_index, size_t predicate_index)
: value_(value), conjunctive_index_(conjunctive_index), predicate_index_(predicate_index) {}

[[nodiscard]] std::string_view get_value() const { return value_; }
[[nodiscard]] size_t get_conjunctive_index() const { return conjunctive_index_; }
[[nodiscard]] size_t get_predicate_index() const { return predicate_index_; }

bool operator==(const RawFilter& other) const;

[[nodiscard]] std::string generateOutput() const;
[[nodiscard]] std::string ToString() const;
friend std::ostream& operator<<(std::ostream& os, const RawFilter& filter);
};

class RawFilterQueryGenerator {
public:
static RawFilterData GenerateRawFilters(const PredicateDisjunction& disjunction);
static std::vector<std::string_view> GenerateRawFiltersFromPredicate(const std::string_view& input);
};

#endif // SPARSER_H_
54 changes: 51 additions & 3 deletions src/sparser/sparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
#include <sstream>
#include <string>

std::string SparserQuery::generateOutput() const {
std::string SparserQuery::ToString() const {
std::ostringstream oss;
for (const auto& conjunction : disjunction.conjunctions) {
for (const auto& conjunction : disjunction_.conjunctions) {
if (!conjunction.predicates.empty()) {
oss << "(";
}
Expand All @@ -18,10 +18,58 @@ std::string SparserQuery::generateOutput() const {
if (!conjunction.predicates.empty()) {
oss << ")";
}
if (&conjunction != &disjunction.conjunctions.back()) {
if (&conjunction != &disjunction_.conjunctions.back()) {
oss << " ∨ ";
}
}
oss << "\n";
return oss.str();
}

std::ostream& operator<<(std::ostream& os, const SparserQuery& query) {
os << query.ToString();
return os;
}

bool RawFilter::operator==(const RawFilter& other) const {
return value_ == other.value_ && conjunctive_index_ == other.conjunctive_index_ &&
predicate_index_ == other.predicate_index_;
}

std::string RawFilter::ToString() const {
std::ostringstream oss;
oss << "RawFilter(value: " << value_ << ", conjunctiveIndex: " << conjunctive_index_
<< ", predicateIndex: " << predicate_index_ << ")";
return oss.str();
}

std::ostream& operator<<(std::ostream& os, const RawFilter& filter) {
os << filter.ToString();
return os;
}

RawFilterData RawFilterQueryGenerator::GenerateRawFilters(const PredicateDisjunction& disjunction) {
RawFilterData raw_filter_data;
for (size_t conjunctive_index = 0; conjunctive_index < disjunction.conjunctions.size(); ++conjunctive_index) {
const auto& conjunction = disjunction.conjunctions[conjunctive_index];
for (size_t predicate_index = 0; predicate_index < conjunction.predicates.size(); ++predicate_index) {
const auto& predicate = conjunction.predicates[predicate_index];
const auto filters = GenerateRawFiltersFromPredicate(predicate.value);
for (const auto& filter : filters) {
raw_filter_data.raw_filters.push_back(filter);
raw_filter_data.conjunctive_indices.push_back(conjunctive_index);
raw_filter_data.predicate_indices.push_back(predicate_index);
}
}
}
return raw_filter_data;
}

std::vector<std::string_view> RawFilterQueryGenerator::GenerateRawFiltersFromPredicate(
const std::string_view& predicate) {
std::vector<std::string_view> rawFilters;
for (size_t i = 0; i < predicate.size() - kRfSize + 1; ++i) {
rawFilters.emplace_back(predicate.substr(i, kRfSize));
}
return rawFilters;
}
78 changes: 67 additions & 11 deletions tests/sparser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,77 @@
#include <gtest/gtest.h>

#include <string>
#include <string_view>
#include <vector>

TEST(SparserQueryTest, GenerateOutput) {
const Predicate pred1 = {"p1"};
const Predicate pred2 = {"p2"};
const Predicate pred3 = {"p3"};
const Predicate pred4 = {"p4"};
TEST(SparserQueryTest, ToString) {
const Predicate pred_1{"p1"};
const Predicate pred_2{"p2"};
const Predicate pred_3{"p3"};
const Predicate pred_4{"p4"};

const PredicateConjunction conj1 = {{pred1, pred2}};
const PredicateConjunction conj2 = {{pred3, pred4}};
const PredicateConjunction conj_1{{pred_1, pred_2}};
const PredicateConjunction conj_2{{pred_3, pred_4}};

const PredicateDisjunction disj = {{conj1, conj2}};
const SparserQuery query = {disj};
const PredicateDisjunction disj{{conj_1, conj_2}};
const SparserQuery query{disj};

const std::string expected = "(p1 ∧ p2) ∨ (p3 ∧ p4)\n";
auto actual = query.generateOutput();
const std::string expected{"(p1 ∧ p2) ∨ (p3 ∧ p4)\n"};
auto actual = query.ToString();

ASSERT_EQ(expected, actual);
}

TEST(SparserQueryTest, GenerateRawFiltersForQueryTest) {
const Predicate pred_1{"Lord of the Rings"};
const Predicate pred_2{"Harry Potter"};
const Predicate pred_3{"The Hobbit"};

const PredicateConjunction conj_1{{pred_1, pred_2}};
const PredicateConjunction conj_2{{pred_3}};
const PredicateDisjunction disj{{conj_1, conj_2}};
const SparserQuery query{disj};

const std::vector<std::string_view> expected_filters{"Lord", "ord ", "rd o", "d of", " of ", "of t", "f th", " the",
"the ", "he R", "e Ri", " Rin", "Ring", "ings", "Harr", "arry",
"rry ", "ry P", "y Po", " Pot", "Pott", "otte", "tter", "The ",
"he H", "e Ho", " Hob", "Hobb", "obbi", "bbit"};

const std::vector<size_t> expected_conjunctive_indices{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1};

const std::vector<size_t> expected_predicate_indices{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0};

const auto actual = RawFilterQueryGenerator::GenerateRawFilters(query.get_disjunction());

ASSERT_EQ(expected_filters.size(), actual.raw_filters.size());
for (size_t i = 0; i < expected_filters.size(); ++i) {
ASSERT_EQ(expected_filters[i], actual.raw_filters[i]);
}

ASSERT_EQ(expected_conjunctive_indices.size(), actual.conjunctive_indices.size());
for (size_t i = 0; i < expected_conjunctive_indices.size(); ++i) {
ASSERT_EQ(expected_conjunctive_indices[i], actual.conjunctive_indices[i]);
}

ASSERT_EQ(expected_predicate_indices.size(), actual.predicate_indices.size());
for (size_t i = 0; i < expected_predicate_indices.size(); ++i) {
ASSERT_EQ(expected_predicate_indices[i], actual.predicate_indices[i]);
}
}

TEST(SparserQueryTest, GenerateRawFiltersForSinglePredicate) {
const Predicate pred_1{"Harry Potter"};

const std::vector<std::string_view> expected{
"Harr", "arry", "rry ", "ry P", "y Po", " Pot", "Pott", "otte", "tter",
};

const auto actual = RawFilterQueryGenerator::GenerateRawFiltersFromPredicate(pred_1.value);

ASSERT_EQ(expected.size(), actual.size());
for (size_t i = 0; i < expected.size(); ++i) {
ASSERT_EQ(expected[i], actual[i]);
}
}