Skip to content

Commit

Permalink
fix(broker/sql): two issues in the mysql object
Browse files Browse the repository at this point in the history
* a possible segfault fixed.
* an issue on errors raised by mariadb that can have errno=0

These issues are due to a change in mariadb. We can have a statement execution that fails while errno=0.

REFS: MON-153670
  • Loading branch information
bouda1 committed Nov 22, 2024
1 parent 8a19cde commit 62b6b51
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/package-collect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ jobs:
run: rm -rf *-debuginfo*.${{ matrix.package_extension }}

# set condition to true if artifacts are needed
- if: ${{ false }}
- if: ${{ true }}
name: Upload package artifacts
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
Expand Down
27 changes: 18 additions & 9 deletions broker/core/sql/src/mysql_connection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

#include <errmsg.h>
#include <mysqld_error.h>

#include "com/centreon/broker/config/applier/init.hh"
#include "com/centreon/broker/log_v2.hh"
Expand Down Expand Up @@ -464,18 +465,26 @@ void mysql_connection::_statement(mysql_task* t) {
"mysql_connection {:p}: execute statement {} attempt {}: {}",
static_cast<const void*>(this), task->statement_id, attempts, query);
if (mysql_stmt_execute(stmt)) {
std::string err_msg(
fmt::format("{} errno={} {}", mysql_error::msg[task->error_code],
::mysql_errno(_conn), ::mysql_stmt_error(stmt)));
SPDLOG_LOGGER_ERROR(log_v2::sql(),
"connection fail to execute statement {:p}: {}",
static_cast<const void*>(this), err_msg);
if (_server_error(::mysql_stmt_errno(stmt))) {
int32_t err_code = ::mysql_stmt_errno(stmt);
std::string err_msg(fmt::format("{} errno={} {}",
mysql_error::msg[task->error_code],
err_code, ::mysql_stmt_error(stmt)));
if (err_code == 0) {
SPDLOG_LOGGER_ERROR(log_v2::sql(),
"mysql_connection: errno=0, so we simulate a "
"server error CR_SERVER_LOST");
err_code = CR_SERVER_LOST;
} else {
SPDLOG_LOGGER_ERROR(log_v2::sql(),
"connection fail to execute statement {:p}: {}",
static_cast<const void*>(this), err_msg);
}
if (_server_error(err_code)) {
set_error_message(err_msg);
break;
}
if (mysql_stmt_errno(stmt) != 1213 &&
mysql_stmt_errno(stmt) != 1205) // Dead Lock error
if (err_code != ER_LOCK_DEADLOCK &&
err_code != ER_LOCK_WAIT_TIMEOUT) // Dead Lock error
attempts = MAX_ATTEMPTS;

if (mysql_commit(_conn)) {
Expand Down
6 changes: 5 additions & 1 deletion broker/core/sql/src/mysql_multi_insert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ void bulk_or_multi::execute(mysql& connexion,
my_error::code ec,
int thread_id) {
if (_bulk_stmt) {
if (!_bulk_bind->empty()) {
/* If the database connection is lost, we can have this issue */
if (!_bulk_bind) {
_bulk_bind = _bulk_stmt->create_bind();
_bulk_bind->reserve(_bulk_row);
} else if (!_bulk_bind->empty()) {
_bulk_stmt->set_bind(std::move(_bulk_bind));
connexion.run_statement(*_bulk_stmt, ec, thread_id);
_bulk_bind = _bulk_stmt->create_bind();
Expand Down
141 changes: 141 additions & 0 deletions tests/broker-engine/services-and-bulk-stmt.robot
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ EBBPS1
IF "${output}" == "((0,),)" BREAK
END
Should Be Equal As Strings ${output} ((0,),)
Disconnect From Database

FOR ${i} IN RANGE ${1000}
Ctn Process Service Check Result host_1 service_${i+1} 2 warning${i}
Expand Down Expand Up @@ -100,6 +101,7 @@ EBBPS1
IF "${output}" == "((0,),)" BREAK
END
Should Be Equal As Strings ${output} ((0,),)
Disconnect From Database

EBBPS2
[Documentation] 1000 service check results are sent to the poller. The test is done with the unified_sql stream, no service status is lost, we find the 1000 results in the database: table services.
Expand Down Expand Up @@ -146,6 +148,7 @@ EBBPS2
IF "${output}" == "((0,),)" BREAK
END
Should Be Equal As Strings ${output} ((0,),)
Disconnect From Database

FOR ${i} IN RANGE ${1000}
Ctn Process Service Check Result host_1 service_${i+1} 2 critical${i}
Expand Down Expand Up @@ -182,6 +185,7 @@ EBBPS2
IF "${output}" == "((0,),)" BREAK
END
Should Be Equal As Strings ${output} ((0,),)
Disconnect From Database

EBMSSM
[Documentation] 1000 services are configured with 100 metrics each. The rrd output is removed from the broker configuration. GetSqlManagerStats is called to measure writes into data_bin.
Expand Down Expand Up @@ -228,6 +232,7 @@ EBMSSM
Sleep 1s
END
Should Be True ${output[0][0]} >= 100000
Disconnect From Database

EBPS2
[Documentation] 1000 services are configured with 20 metrics each. The rrd output is removed from the broker configuration to avoid to write too many rrd files. While metrics are written in bulk, the database is stopped. This must not crash broker.
Expand Down Expand Up @@ -390,6 +395,142 @@ metric_mapping
${grep_res} Grep File /tmp/test.log name: metric1 corresponds to metric id
Should Not Be Empty ${grep_res} metric name "metric1" not found

EBMSSMDBD
[Documentation] 1000 services are configured with 100 metrics each.
... The rrd output is removed from the broker configuration.
... While metrics are written in the database, we stop the database and then restart it.
... Broker must recover its connection to the database and continue to write metrics.
[Tags] broker engine unified_sql MON-153320
Ctn Clear Metrics
Ctn Config Engine ${1} ${1} ${1000}
# We want all the services to be passive to avoid parasite checks during our test.
Ctn Set Services Passive ${0} service_.*
Ctn Config Broker central
Ctn Config Broker rrd
Ctn Config Broker module ${1}
Ctn Config BBDO3 1
Ctn Broker Config Log central core error
Ctn Broker Config Log central tcp error
Ctn Broker Config Log central sql debug
Ctn Config Broker Sql Output central unified_sql
Ctn Config Broker Remove Rrd Output central
Ctn Clear Retention
${start} Get Current Date
Ctn Start Broker
Ctn Start Engine

Ctn Wait For Engine To Be Ready ${start} 1

${start} Ctn Get Round Current Date
# Let's wait for one "INSERT INTO data_bin" to appear in stats.
Log To Console Many service checks with 100 metrics each are processed.
FOR ${i} IN RANGE ${1000}
Ctn Process Service Check Result With Metrics host_1 service_${i+1} 1 warning${i} 100
END

Log To Console We wait for at least one metric to be written in the database.
# Let's wait for all force checks to be in the storage database.
Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort}
FOR ${i} IN RANGE ${500}
${output} Query
... SELECT COUNT(s.last_check) FROM metrics m LEFT JOIN index_data i ON m.index_id = i.id LEFT JOIN services s ON s.host_id = i.host_id AND s.service_id = i.service_id WHERE metric_name LIKE "metric_%%" AND s.last_check >= ${start}
IF ${output[0][0]} >= 1 BREAK
Sleep 1s
END
Disconnect From Database

Log To Console Let's start some database manipulation...
${start} Get Current Date

FOR ${i} IN RANGE ${3}
Ctn Stop Mysql
Sleep 10s
Ctn Start Mysql
${content} Create List could not insert data in data_bin
${result} Ctn Find In Log With Timeout ${centralLog} ${start} ${content} 10
Log To Console ${result}
END

EBMSSMPART
[Documentation] 1000 services are configured with 100 metrics each.
... The rrd output is removed from the broker configuration.
... The data_bin table is configured with two partitions p1 and p2 such
... that p1 contains old data and p2 contains current data.
... While metrics are written in the database, we remove the p2 partition.
... Once the p2 partition is recreated, broker must recover its connection
... to the database and continue to write metrics.
... To check that last point, we force a last service check and we check
... that its metrics are written in the database.
[Tags] broker engine unified_sql MON-153320
Ctn Clear Metrics
Ctn Config Engine ${1} ${1} ${1000}
# We want all the services to be passive to avoid parasite checks during our test.
Ctn Set Services Passive ${0} service_.*
Ctn Config Broker central
Ctn Config Broker rrd
Ctn Config Broker module ${1}
Ctn Config BBDO3 1
Ctn Broker Config Log central core error
Ctn Broker Config Log central tcp error
Ctn Broker Config Log central sql trace
Ctn Config Broker Sql Output central unified_sql
Ctn Config Broker Remove Rrd Output central
Ctn Clear Retention

Ctn Prepare Partitions For Data Bin
${start} Get Current Date
Ctn Start Broker
Ctn Start Engine

Ctn Wait For Engine To Be Ready ${start} 1

${start} Ctn Get Round Current Date
# Let's wait for one "INSERT INTO data_bin" to appear in stats.
Log To Console Many service checks with 100 metrics each are processed.
FOR ${i} IN RANGE ${1000}
Ctn Process Service Check Result With Metrics host_1 service_${i+1} 1 warning${i} 100
END

Log To Console We wait for at least one metric to be written in the database.
# Let's wait for all force checks to be in the storage database.
Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort}
FOR ${i} IN RANGE ${500}
${output} Query
... SELECT COUNT(s.last_check) FROM metrics m LEFT JOIN index_data i ON m.index_id = i.id LEFT JOIN services s ON s.host_id = i.host_id AND s.service_id = i.service_id WHERE metric_name LIKE "metric_%%" AND s.last_check >= ${start}
IF ${output[0][0]} >= 1 BREAK
Sleep 1s
END
Disconnect From Database

Log To Console Let's start some database manipulation...
Ctn Remove P2 From Data Bin
${start} Get Current Date

${content} Create List errno=
FOR ${i} IN RANGE ${6}
${result} Ctn Find In Log With Timeout ${centralLog} ${start} ${content} 10
IF ${result} BREAK
END

Log To Console Let's recreate the p2 partition...
Ctn Add P2 To Data Bin

${start} Ctn Get Round Current Date
Ctn Process Service Check Result With Metrics host_1 service_1 0 Last Output OK 100

Log To Console Let's wait for the last service check to be in the database...
Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort}
FOR ${i} IN RANGE ${120}
${output} Query SELECT count(*) FROM data_bin WHERE ctime >= ${start} - 10
Log To Console ${output}
IF ${output[0][0]} >= 100 BREAK
Sleep 1s
END
Log To Console ${output}
Should Be True ${output[0][0]} >= 100
Disconnect From Database


*** Keywords ***
Ctn Test Clean
Ctn Stop Engine
Expand Down
97 changes: 97 additions & 0 deletions tests/resources/Broker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2734,3 +2734,100 @@ def ctn_broker_get_ba(port: int, ba_id: int, output_file: str, timeout=TIMEOUT):
except:
logger.console("gRPC server not ready")
return res


def ctn_prepare_partitions_for_data_bin():
"""
Create two partitions for the data_bin table.
The first one named p1 contains data with ctime older than now - 60.
The second one named p2 contains data with ctime older than now + 3600.
"""
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_STORAGE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)

now = int(time.time())
before = now - 60
after = now + 3600
with connection:
with connection.cursor() as cursor:
cursor.execute("DROP TABLE IF EXISTS data_bin")
sql = f"""CREATE TABLE `data_bin` (
`id_metric` int(11) DEFAULT NULL,
`ctime` int(11) DEFAULT NULL,
`value` float DEFAULT NULL,
`status` enum('0','1','2','3','4') DEFAULT NULL,
KEY `index_metric` (`id_metric`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
PARTITION BY RANGE (`ctime`)
(PARTITION `p1` VALUES LESS THAN ({before}) ENGINE = InnoDB,
PARTITION `p2` VALUES LESS THAN ({after}) ENGINE = InnoDB)"""
cursor.execute(sql)
connection.commit()


def ctn_remove_p2_from_data_bin():
"""
Remove the partition p2 from the data_bin table.
"""
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_STORAGE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)

with connection:
with connection.cursor() as cursor:
cursor.execute("ALTER TABLE data_bin DROP PARTITION p2")
connection.commit()


def ctn_add_p2_to_data_bin():
"""
Add the partition p2 the the data_bin table.
"""
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_STORAGE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)

after = int(time.time()) + 3600
with connection:
with connection.cursor() as cursor:
cursor.execute(
f"ALTER TABLE data_bin ADD PARTITION (PARTITION p2 VALUES LESS THAN ({after}))")
connection.commit()


def ctn_init_data_bin_without_partition():
"""
Recreate the data_bin table without partition.
"""
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_STORAGE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)

now = int(time.time())
before = now - 60
after = now + 3600
with connection:
with connection.cursor() as cursor:
cursor.execute("DROP TABLE IF EXISTS data_bin")
sql = f"""CREATE TABLE `data_bin` (
`id_metric` int(11) DEFAULT NULL,
`ctime` int(11) DEFAULT NULL,
`value` float DEFAULT NULL,
`status` enum('0','1','2','3','4') DEFAULT NULL,
KEY `index_metric` (`id_metric`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1"""
cursor.execute(sql)
connection.commit()
15 changes: 8 additions & 7 deletions tests/resources/resources.robot
Original file line number Diff line number Diff line change
Expand Up @@ -363,13 +363,14 @@ Ctn Dump Ba On Error

Ctn Process Service Result Hard
[Arguments] ${host} ${svc} ${state} ${output}
Repeat Keyword
... 3 times
... Ctn Process Service Check Result
... ${host}
... ${svc}
... ${state}
... ${output}
FOR ${idx} IN RANGE 3
Ctn Process Service Check Result
... ${host}
... ${svc}
... ${state}
... ${output}
Sleep 1s
END

Ctn Wait For Engine To Be Ready
[Arguments] ${start} ${nbEngine}=1
Expand Down

0 comments on commit 62b6b51

Please sign in to comment.