Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(shannon): selective-offload-est-query-cost #306

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 23 additions & 26 deletions mysql-test/suite/secondary_engine/r/query_preparation.result
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ x
234
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 1
Secondary_engine_execution_count 0
DROP FUNCTION f1;
DROP FUNCTION f2;
DROP TABLE tf;
Expand Down Expand Up @@ -637,15 +637,15 @@ a
INSERT INTO t1 SELECT * FROM t2;
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
# An error shall be returned if table is not loaded.
ALTER TABLE t1 SECONDARY_UNLOAD;
SET @@use_secondary_engine = FORCED;
SELECT * FROM t1;
Got one of the listed errors
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
SET @@use_secondary_engine = ON;
SELECT * FROM t1;
a
Expand All @@ -654,7 +654,7 @@ a
1
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
DROP TABLE t1;
DROP TABLE t2;
SET @@use_secondary_engine = @my_use_secondary_engine;
Expand Down Expand Up @@ -774,27 +774,27 @@ EXECUTE ps;
a
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 1
Secondary_engine_execution_count 0
SET @@use_secondary_engine = FORCED;
EXECUTE ps;
a
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
# Unload table from secondary engine.
ALTER TABLE t1 SECONDARY_UNLOAD;
SET @@use_secondary_engine = ON;
EXECUTE ps;
a
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
SET @@use_secondary_engine = FORCED;
EXECUTE ps;
a
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
DROP PREPARE ps;
DROP TABLE t1;
SET @@use_secondary_engine = @my_use_secondary_engine;
Expand Down Expand Up @@ -830,13 +830,13 @@ a
SET @@use_secondary_engine = @my_use_secondary_engine;
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
SELECT /*+ SET_VAR(use_secondary_engine = OFF) */ * FROM t1;
a
1
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 1
DROP TABLE t1;
SET @@use_secondary_engine = @my_use_secondary_engine;
#
Expand All @@ -848,9 +848,9 @@ ALTER TABLE t SECONDARY_LOAD;
SELECT * FROM t;
x
FLUSH STATUS;
SELECT 1;
1
1
SELECT 0;
0
0
DROP TABLE t;
#
# Load only a subset of columns into secondary engine.
Expand Down Expand Up @@ -1046,7 +1046,7 @@ a b
1 1
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 2
Secondary_engine_execution_count 0
DROP TABLE t1;
#
# Use NOT SECONDARY with other constraint on column.
Expand All @@ -1064,7 +1064,7 @@ SELECT /*+ SET_VAR(use_secondary_engine = FORCED) */ b FROM t1;
ERROR HY000: Secondary engine operation failed. One or more read columns are marked as NOT SECONDARY.
SHOW SESSION STATUS LIKE 'Secondary_engine_execution_count';
Variable_name Value
Secondary_engine_execution_count 1
Secondary_engine_execution_count 0
DROP TABLE t1;
# Add NON SECONDARY and do not use SECONDARY_ENGINE AT CREATE
CREATE TABLE t1 (a INT NOT SECONDARY, b INT);
Expand Down Expand Up @@ -1092,11 +1092,8 @@ Table Op Msg_type Msg_text
test.t analyze status OK
EXPLAIN FORMAT=TREE SELECT * FROM t WHERE a IS NULL;
EXPLAIN
-> Filter: (t.a = <cache>(last_insert_id())) (rows=1)
-> Table scan on t in secondary engine Rapid (rows=1)
-> Rows fetched before execution (rows=1)

Warnings:
Note 1003 Query is executed in secondary engine; the actual query plan may diverge from the printed one
DROP TABLE t;
SET @@sql_auto_is_null = @saved_sql_auto_is_null;
# Bug#29288023: Join condition is substituted with REF access
Expand Down Expand Up @@ -1295,7 +1292,7 @@ SELECT count_star, count_secondary, sum_select_scan
FROM performance_schema.events_statements_summary_by_digest
WHERE digest_text LIKE 'SELECT %';
count_star count_secondary sum_select_scan
2 1 2
3 1 2
EXECUTE ps;
x
1
Expand All @@ -1314,7 +1311,7 @@ SELECT execution_engine, count_execute, count_secondary
FROM performance_schema.prepared_statements_instances
WHERE statement_name = 'ps';
execution_engine count_execute count_secondary
SECONDARY 3 1
PRIMARY 3 0
SET @saved_use_secondary_engine = @@use_secondary_engine;
SET use_secondary_engine = OFF;
EXECUTE ps;
Expand All @@ -1336,7 +1333,7 @@ SELECT execution_engine, count_execute, count_secondary
FROM performance_schema.prepared_statements_instances
WHERE statement_name = 'ps';
execution_engine count_execute count_secondary
SECONDARY 4 2
PRIMARY 4 0
SET @saved_use_secondary_engine = @@use_secondary_engine;
SET use_secondary_engine = FORCED;
EXECUTE ps;
Expand All @@ -1358,7 +1355,7 @@ SELECT execution_engine, count_execute, count_secondary
FROM performance_schema.prepared_statements_instances
WHERE statement_name = 'ps';
execution_engine count_execute count_secondary
SECONDARY 5 3
PRIMARY 5 0
CREATE PROCEDURE p() EXECUTE ps;
CALL p();
x
Expand All @@ -1374,12 +1371,12 @@ SELECT execution_engine, count_execute, count_secondary
FROM performance_schema.prepared_statements_instances
WHERE statement_name = 'ps';
execution_engine count_execute count_secondary
SECONDARY 6 4
PRIMARY 6 0
DROP PREPARE ps;
FLUSH STATUS;
CREATE TABLE tt AS SELECT * FROM t;
SHOW STATUS LIKE 'Select_scan';
Variable_name Value
Select_scan 2
DROP TABLE tt;
DROP TABLE t;
DROP TABLE IF EXISTS tt;
DROP TABLE IF EXISTS t;
4 changes: 2 additions & 2 deletions mysql-test/suite/secondary_engine/t/query_preparation.test
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,6 @@ DROP PREPARE ps;
FLUSH STATUS;
CREATE TABLE tt AS SELECT * FROM t;
SHOW STATUS LIKE 'Select_scan';
DROP TABLE tt;
DROP TABLE IF EXISTS tt;

DROP TABLE t;
DROP TABLE IF EXISTS t;
27 changes: 7 additions & 20 deletions sql/sql_class.cc
Original file line number Diff line number Diff line change
Expand Up @@ -631,42 +631,29 @@ void Open_tables_state::reset_open_tables_state() {
//To cache all info need by secondary engine at RapidPrepareEstimateQueryCosts stage.
void Secondary_engine_statement_context::cache_primary_plan_info(THD* thd, JOIN* join) {
m_primary_cost = thd->m_current_query_cost;
m_secondary_cost_threshold = thd->variables.secondary_engine_cost_threshold;

m_primary_plan = join;

m_count_all_base_tables = thd->lex->unit->first_query_block()->leaf_table_count;
//if it's a select query and involves more than 3 tables, menans complex query, otherwise not.
m_complex_query =
(thd->lex->sql_command == SQLCOM_SELECT &&
thd->lex->unit->first_query_block()->leaf_table_count >= 3) ? true : false;
double total_data_size{0};
bool all_tables_loaded {true}, still_populating {false};
m_complex_query =
(thd->lex->sql_command == SQLCOM_SELECT && m_count_all_base_tables >= 3) ? true : false;

if(thd->lex->using_hypergraph_optimizer) {

} else {
for (size_t i = join->const_tables; i < join->tables; ++i) {
Table_ref* tab_ref = join->qep_tab[i].table_ref;
if (!tab_ref) continue;

if (!tab_ref->table->s->secondary_load) {
all_tables_loaded &= false;
}

std::ostringstream db_tbl;
db_tbl << tab_ref->db << ":" << tab_ref->table_name;
m_tables.emplace_back(db_tbl.str());
total_data_size =
(const_cast<Table_ref*>(tab_ref)->fetch_number_of_rows() * tab_ref->table->s->reclength) / 1024;
m_tables.emplace_back(tab_ref);
m_base_table_rows += (const_cast<Table_ref*>(tab_ref)->fetch_number_of_rows());
}
}

m_data_ready = all_tables_loaded && !still_populating;
//if table size is large than 1GB, means it's a large table.
m_large_table = (int)(total_data_size / (1024 * 1024)) > 1 ? true : false;
m_query_type =
(thd->lex->unit->first_query_block()->olap == ROLLUP_TYPE) ? QUERY_TYPE::OLAP : QUERY_TYPE::OLTP;

auto root_access_path = thd->lex->unit->root_access_path();
assert (root_access_path);
//join->row_limit == 1;
}

Expand Down
42 changes: 25 additions & 17 deletions sql/sql_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,11 @@ class Secondary_engine_statement_context {
may override the destructor in subclasses and add code that
performs cleanup tasks that are needed after query execution.
*/
enum class QUERY_TYPE : int8 {
OLTP,
OLAP
};

virtual ~Secondary_engine_statement_context() = default;
virtual bool is_primary_engine_optimal() const { return true; }

Expand All @@ -952,30 +957,33 @@ class Secondary_engine_statement_context {
virtual JOIN* get_cached_primary_plan_info() const {
return m_primary_plan;
}

double get_primary_cost() const { return m_primary_cost; }
double get_secondary_cost_threshold() const { return m_secondary_cost_threshold; }
std::vector<std::string>& get_query_tables() { return m_tables; }
enum class QUERY_TYPE : int8 {
OLTP,
OLAP
};

uint get_count_base_table() const { return m_count_all_base_tables; }

QUERY_TYPE get_query_type() const { return m_query_type; }

std::vector<Table_ref*>& get_query_tables() { return m_tables; }
private:
// query plan on primary engine.
JOIN* m_primary_plan {nullptr};
// query type: OLTP or OLAP.
QUERY_TYPE m_query_type {QUERY_TYPE::OLTP};
//all tables used in query, gets from qep_tab.
std::vector<Table_ref*> m_tables;
//the # of base table used in statement.
uint m_count_all_base_tables{0};
// cost on primary engine.
double m_primary_cost {0};
//cost threshold to deterimine whether goes to secondary or not.
double m_secondary_cost_threshold {0};
// query plan on primary engine.
JOIN* m_primary_plan {nullptr};
// complex query(multi-table join) or not.
//# of refereing to index table scan.
uint m_count_ref_index_ts{0};
//the # of rows of all base tables.
uint m_base_table_rows {0};
//whether all tables use index table scan.
bool are_all_ts_index_ref {false};
//is a complex query or not.
bool m_complex_query {false};
// the data of table are loaded in, not in change pop queue or applying.
bool m_data_ready {false};
// large table or not.
bool m_large_table {false};
//all tables used in query.
std::vector<std::string> m_tables;
};

/**
Expand Down
47 changes: 28 additions & 19 deletions storage/rapid_engine/handler/ha_shannon_rapid.cc
Original file line number Diff line number Diff line change
Expand Up @@ -538,16 +538,14 @@ static void rapid_kill_connection(handlerton *hton, /*!< in: innobase handlerto
}
}

void SetSecondaryEngineOffloadFailedReason(THD *thd, const char *msg) {
static inline void SetSecondaryEngineOffloadFailedReason(THD *thd, const char *msg) {
assert(thd);
thd->lex->m_secondary_engine_offload_or_exec_failed_reason.clear();
thd->lex->m_secondary_engine_offload_or_exec_failed_reason.append(msg);
thd->lex->m_secondary_engine_offload_or_exec_failed_reason = msg;
my_error(ER_SECONDARY_ENGINE, MYF(0), msg);
}

const char *GetSecondaryEngineOffloadorExecFailedReason(THD *thd) {
assert(thd);

return thd->lex->m_secondary_engine_offload_or_exec_failed_reason.c_str();
}

Expand All @@ -571,21 +569,15 @@ static bool RapidPrepareEstimateQueryCosts(THD *thd, LEX *lex) {
if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_OFF) {
SetSecondaryEngineOffloadFailedReason(thd, "use_secondary_engine set to off.");
return true;
}

else if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_FORCED)
return false;

// gets the shannon statement context from thd, which stores in SecondaryEnginePrePrepareHook.
if (!thd->variables.rapid_use_dynamic_offload) {
} else if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_FORCED)
return false;
}

auto shannon_statement_context = thd->secondary_engine_statement_context();
auto primary_plan_info = shannon_statement_context->get_cached_primary_plan_info();

// 1: to check whether the sys_pop_data_sz has too many data to populate.
uint64 too_much_pop_threshold = static_cast<uint64_t>(0.8 * ShannonBase::rpd_pop_buff_sz_max);
uint64 too_much_pop_threshold =
static_cast<uint64_t>(ShannonBase::SHANNON_TO_MUCH_POP_THRESHOLD_RATIO * ShannonBase::rpd_pop_buff_sz_max);
if (ShannonBase::Populate::sys_pop_data_sz > too_much_pop_threshold) {
SetSecondaryEngineOffloadFailedReason(thd, "too much changes need to populate.");
return true;
Expand All @@ -595,11 +587,17 @@ static bool RapidPrepareEstimateQueryCosts(THD *thd, LEX *lex) {
// if there're still do populating, then goes to innodb. and gets cardinality of tables.
for (uint i = primary_plan_info->tables; i < primary_plan_info->tables; i++) {
std::string db_tb;
if (ShannonBase::Populate::Populator::check_population_status(db_tb)) return true;
if (ShannonBase::Populate::Populator::check_population_status(db_tb)) {
SetSecondaryEngineOffloadFailedReason(thd, "table queried is populating.");
return true;
}
}

// 3: checks dict encoding projection, and varlen project size, etc.

if (ShannonBase::Utils::Util::check_dict_encoding_projection(thd)) {
SetSecondaryEngineOffloadFailedReason(thd, "dict encoding is not supported.");
return true;
}
return false;
}

Expand Down Expand Up @@ -701,16 +699,27 @@ static void AssertSupportedPath(const AccessPath *path) {
// In this function, Dynamic offload retrieves info from
// rapid_statement_context and additionally looks at Change
// propagation lag to decide if query should be offloaded to rapid
// returns true, goes to innodb engine.
// return false, goes to secondary engine.
// returns true, goes to innodb engine. otherwise, false, goes to secondary engine.
static bool RapidOptimize(THD *thd [[maybe_unused]], LEX *lex) {
if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_OFF)
if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_OFF) {
SetSecondaryEngineOffloadFailedReason(thd, "in RapidOptimize, set use_secondary_engine to false.");
return true;
else if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_FORCED)
} else if (thd->variables.use_secondary_engine == SECONDARY_ENGINE_FORCED)
return false;

// auto statement_context = thd->secondary_engine_statement_context();
// to much changes to populate, then goes to primary engine.
ulonglong too_much_pop_threshold =
static_cast<ulonglong>(ShannonBase::SHANNON_TO_MUCH_POP_THRESHOLD_RATIO * ShannonBase::rpd_pop_buff_sz_max);
if (ShannonBase::Populate::sys_pop_buff.size() > 1000 ||
ShannonBase::Populate::sys_pop_data_sz > too_much_pop_threshold) {
SetSecondaryEngineOffloadFailedReason(thd, "in RapidOptimize, the CP lag is too much.");
return true;
}

return false;
}

static bool OptimizeSecondaryEngine(THD *thd [[maybe_unused]], LEX *lex) {
// The context should have been set by PrepareSecondaryEngine.
assert(lex->secondary_engine_execution_context() != nullptr);
Expand Down
Loading