From f1a20e34a54658c8eb5a8893ddf74129f3b38ea1 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:49:53 +0800 Subject: [PATCH 1/5] Add a check to verify if perf is installed on the system when gather perf (#633) * Rename Dockerfile to DockerFile * fix * fix * fixed: issue #620 * fixed: issue #620 * fixed: issue #620 --- dev_helper.sh | 0 src/handler/gather/gather_perf.py | 26 +++++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) mode change 100644 => 100755 dev_helper.sh diff --git a/dev_helper.sh b/dev_helper.sh old mode 100644 new mode 100755 diff --git a/src/handler/gather/gather_perf.py b/src/handler/gather/gather_perf.py index d3da96dc..6418d74f 100644 --- a/src/handler/gather/gather_perf.py +++ b/src/handler/gather/gather_perf.py @@ -142,13 +142,14 @@ def __handle_from_node(self, node, local_stored_path): resp["error"] = "can't find observer" return resp for pid_observer in pid_observer_list: - if self.scope == "sample": - self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer) - elif self.scope == "flame": - self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer) - else: - self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer) - self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer) + if self.__perf_checker(ssh_client): + if self.scope == "sample": + self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer) + elif self.scope == "flame": + self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer) + else: + self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer) + self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer) self.__gather_top(ssh_client, remote_dir_full_path, pid_observer) zip_dir(ssh_client, "/tmp", remote_dir_name, self.stdio) @@ -177,6 +178,17 @@ def __gather_perf_sample(self, ssh_client, gather_path, pid_observer): except: self.stdio.error("generate perf sample data on server [{0}] failed".format(ssh_client.get_name())) + def __perf_checker(self, ssh_client): + cmd = "command -v perf1" + result = ssh_client.exec_cmd(cmd) + + if result: + self.stdio.verbose("perf is installed at [{0}] on server [{1}]".format(result, ssh_client.get_name())) + return True + else: + self.stdio.error("perf is not installed on server [{0}]. gather perf information will be skipped. Please install perf manually. ".format(ssh_client.get_name())) + return False + def __gather_perf_flame(self, ssh_client, gather_path, pid_observer): try: self.stdio.start_loading('gather perf flame') From c0b2bed173e536c643289cf7ae5456042fde26a7 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:28:50 +0800 Subject: [PATCH 2/5] Update gather_perf.py (#634) --- src/handler/gather/gather_perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/handler/gather/gather_perf.py b/src/handler/gather/gather_perf.py index 6418d74f..394d71ac 100644 --- a/src/handler/gather/gather_perf.py +++ b/src/handler/gather/gather_perf.py @@ -179,7 +179,7 @@ def __gather_perf_sample(self, ssh_client, gather_path, pid_observer): self.stdio.error("generate perf sample data on server [{0}] failed".format(ssh_client.get_name())) def __perf_checker(self, ssh_client): - cmd = "command -v perf1" + cmd = "command -v perf" result = ssh_client.exec_cmd(cmd) if result: From db77ec9d889f843cbbd38461f1c074bbae2170a1 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:29:48 +0800 Subject: [PATCH 3/5] add top sql gather scene (#636) * Rename Dockerfile to DockerFile * add top sql scene --- plugins/gather/tasks/observer/topsql.yaml | 108 ++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 plugins/gather/tasks/observer/topsql.yaml diff --git a/plugins/gather/tasks/observer/topsql.yaml b/plugins/gather/tasks/observer/topsql.yaml new file mode 100644 index 00000000..afda20b8 --- /dev/null +++ b/plugins/gather/tasks/observer/topsql.yaml @@ -0,0 +1,108 @@ +info_en: "[topsql info]" +info_cn: "[集群 topsql]" +command: obdiag gather scene run --scene=observer.topsql + +task: + - version: "[4.0.0.0, *]" + steps: + - type: sql + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,SQL_ID,substr(query_sql, 1, 100) as query_sql,count(1),avg(ELAPSED_TIME),avg(EXECUTE_TIME),avg(QUEUE_TIME),avg(AFFECTED_ROWS),avg(GET_PLAN_TIME) + from oceanbase.gv$ob_sql_audit + where time_to_usec(now(6))-request_time < 10*60*1000000 + group by SQL_ID order by avg(ELAPSED_TIME)*count(1) desc limit 20;" + global: true + - type: sql + tittle: Top-N SQL queries ranked by request count in the last 10 minutes + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,SQL_ID, substr(query_sql, 1, 100) as query_sql,count(*) as QPS, avg(t1.elapsed_time) RT + from oceanbase.gv$ob_sql_audit t1 + where IS_EXECUTOR_RPC = 0 + and request_time > (time_to_usec(now()) - 10*60*1000000) + and request_time < time_to_usec(now()) + group by t1.sql_id order by QPS desc limit 20;" + global: true + - type: sql + tittle: The SQL that consumes the most CPU among all SQLs in the last 10 minutes + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,sql_id, substr(query_sql, 1, 100) as query_sql, + sum(elapsed_time - queue_time) as cpu_time, count(*) cnt, + avg(get_plan_time), avg(execute_time) + from oceanbase.gv$ob_sql_audit + where request_time > (time_to_usec(now()) - 10*60*1000000) + and request_time < time_to_usec(now()) + group by sql_id order by cpu_time desc limit 20;" + global: true + - type: sql + tittle: Check whether there have been a large number of unreasonable remote execution requests for SQL executions in the past 10 minutes + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ count(*), plan_type + from oceanbase.gv$ob_sql_audit + where IS_EXECUTOR_RPC = 0 + and is_inner_sql = 0 + and request_time > (time_to_usec(now()) -10*60*1000000) + and request_time < time_to_usec(now()) + group by plan_type limit 20;" + global: true + - type: sql + tittle: SQL for querying a full table scan + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name, SQL_ID, substr(query_sql, 1, 100) as query_sql, elapsed_time + from oceanbase.gv$ob_sql_audit + where table_scan = 1 + and request_time > (time_to_usec(now()) - 10*60*1000000) + and request_time < time_to_usec(now()) + and is_inner_sql = 0 + order by elapsed_time desc limit 20;" + global: true + - version: "[3.0.0.0, 4.0.0.0]" + steps: + - type: sql + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,SQL_ID,substr(query_sql, 1, 100) as query_sql,count(1),avg(ELAPSED_TIME),avg(EXECUTE_TIME),avg(QUEUE_TIME),avg(AFFECTED_ROWS),avg(GET_PLAN_TIME) + from oceanbase.gv$sql_audit + where time_to_usec(now(6))-request_time < 10*60*1000000 + and is_inner_sql = 0 + group by SQL_ID order by avg(ELAPSED_TIME)*count(1) desc limit 20 ;" + global: true + - type: sql + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name, SQL_ID,substr(query_sql, 1, 100) as query_sql, count(*) as QPS, avg(t1.elapsed_time) RT + from oceanbase.gv$sql_audit t1 + where IS_EXECUTOR_RPC = 0 + and is_inner_sql = 0 + and request_time > (time_to_usec(now()) - 10*60*1000000) + and request_time < time_to_usec(now()) + group by t1.sql_id order by QPS desc limit 20;" + global: true + - type: sql + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,sql_id, substr(query_sql, 1, 100) as query_sql, + sum(elapsed_time - queue_time) as cpu_time, count(*) cnt, + avg(get_plan_time), avg(execute_time) + from oceanbase.gv$sql_audit + where request_time > (time_to_usec(now()) - 10*60*1000000) + and request_time < time_to_usec(now()) + and is_inner_sql = 0 + group by sql_id order by cpu_time desc limit 20;" + global: true + - type: sql + tittle: Check whether there have been a large number of unreasonable remote execution requests for SQL executions in the past #{mtime} minutes + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ count(*), plan_type + from oceanbase.gv$sql_audit + where IS_EXECUTOR_RPC = 0 + and request_time > (time_to_usec(now()) - 10*60*1000000) + and request_time < time_to_usec(now()) + group by plan_type limit 20;" + global: true + - type: sql + sql: + "select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name, SQL_ID, substr(query_sql, 1, 100) as query_sql + from oceanbase.gv$sql_audit + where table_scan = 1 + and request_time > (time_to_usec(now()) - 10*60*10000) + and request_time < time_to_usec(now()) + and is_inner_sql = 0 + order by elapsed_time desc limit 20;" + global: true From 84295a6c0170c20f153ddfe38ecf068357c645c3 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:30:24 +0800 Subject: [PATCH 4/5] SQL Monitor Report support dbms_xplan.display_cursor (#635) * Rename Dockerfile to DockerFile * support dbms_xplan.display_cursor * fix --- .DS_Store | Bin 6148 -> 0 bytes src/common/ob_connector.py | 20 ++++++++++++++++ src/handler/gather/gather_plan_monitor.py | 28 +++++++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 5c3c6c2f9724dd4dd277ebc9126dd114ea190606..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ8r`;3?&nz2#_UXMqQyd5RCK$xj;WoiUJKh7}-7bTs>MJKSKhin>#dk1gIxb zd=m5q(-aYH&)uWQMnqO{L;12{YPN4au|-A{2*()*8IIlX_S*NKlkD38<37lToa9B` zFZs4XqXJZb3Qz$mKm}%3AWQ6OJ^RT#kP1+N|E_?29}3*CCXRvr>A>JE0I)&W4Rh}$ zfW-p9nm7g`0@I)ZgR0qLXwVTanO76Xz@Up}^PzdOW{0AFJI*hjE?NUQQUNM(t-vyt z8>|0M@L&4>YZ6yffC}7|0^02MyB1H%+B$ih)!G7ohFi`z+zfN4VDNGb^m2@amE)}^ bMP9Kv_G{u8=yb%L4&={(=|ZCdf33g|P&^fT diff --git a/src/common/ob_connector.py b/src/common/ob_connector.py index 02cc9d70..72fd2324 100644 --- a/src/common/ob_connector.py +++ b/src/common/ob_connector.py @@ -151,6 +151,26 @@ def execute_sql_pretty(self, sql): cursor.close() return ret + def execute_display_cursor(self, business_sql): + if self.conn is None: + self._connect_db() + else: + self.conn.ping(reconnect=True) + cursor = self.conn.cursor() + try: + cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED") + cursor.execute(business_sql) + + cursor.execute("select dbms_xplan.display_cursor(0, 'all')") + plan_result = from_db_cursor(cursor) + plan_result.align = 'l' + cursor.close() + return plan_result + except Exception as e: + raise Exception("execute display cursor error: {0}".format(e)) + finally: + cursor.close() + def callproc(self, procname, args=()): if self.conn is None: self._connect_db() diff --git a/src/handler/gather/gather_plan_monitor.py b/src/handler/gather/gather_plan_monitor.py index bf3ada90..890d326c 100644 --- a/src/handler/gather/gather_plan_monitor.py +++ b/src/handler/gather/gather_plan_monitor.py @@ -54,6 +54,7 @@ def __init__(self, context, gather_pack_dir='./', is_scene=False): self.sql_audit_name = "gv$sql_audit" self.plan_explain_name = "gv$plan_cache_plan_explain" self.is_scene = is_scene + self.ob_version = "4.2.5.0" if self.context.get_variable("gather_timestamp", None): self.gather_timestamp = self.context.get_variable("gather_timestamp") else: @@ -165,6 +166,8 @@ def handle_plan_monitor_from_ob(cluster_name): # 输出plan cache的信息 self.stdio.verbose("[sql plan monitor report task] report plan cache") self.report_plan_cache(plan_explain_sql) + # dbms_xplan.display_cursor + self.report_display_cursor_obversion4(sql) # 输出表结构的信息 self.stdio.verbose("[sql plan monitor report task] report table schema") self.report_schema(user_sql, tenant_name) @@ -216,7 +219,7 @@ def handle_plan_monitor_from_ob(cluster_name): if getattr(sys, 'frozen', False): absPath = os.path.dirname(sys.executable) else: - absPath = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + absPath = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) cs_resources_path = os.path.join(absPath, "resources") self.stdio.verbose("[cs resource path] : {0}".format(cs_resources_path)) target_resources_path = os.path.join(pack_dir_this_command, "resources") @@ -667,6 +670,7 @@ def tenant_mode_detected(self): if matched_version: version = matched_version.group(2) + self.ob_version = version major_version = int(version.split('.')[0]) self.sql_audit_name = "gv$ob_sql_audit" if major_version >= 4 else "gv$sql_audit" @@ -998,3 +1002,25 @@ def report_db_time_display_obversion4(self, sql_plan_monitor_db_time): self.stdio.exception("DB Time display> %s" % sql_plan_monitor_db_time) self.stdio.exception(repr(e)) pass + + def __is_select_statement(self, sql): + stripped_sql = sql.strip().upper() + return stripped_sql.startswith('SELECT') + + def report_display_cursor_obversion4(self, display_cursor_sql): + if not self.__is_select_statement(display_cursor_sql): + return + try: + if not StringUtils.compare_versions_lower(self.ob_version, "4.2.5.0"): + plan_result = self.db_connector.execute_display_cursor(display_cursor_sql) + self.stdio.verbose("execute SQL: %s", display_cursor_sql) + step = "obclient> SET TRANSACTION ISOLATION LEVEL READ COMMITTED;\n{0}\nselect dbms_xplan.display_cursor(0, 'all');".format(display_cursor_sql) + self.report_pre(step) + self.report_pre(plan_result) + self.stdio.verbose("display_cursor report complete") + else: + self.stdio.verbose("display_cursor report requires the OB version to be greater than 4.2.5.0 Your version: {0} does not meet this requirement.".format(self.ob_major_version)) + except Exception as e: + self.stdio.exception("display_cursor report> %s" % display_cursor_sql) + self.stdio.exception(repr(e)) + pass From c7bfeaa62d30608fe13ac82ea77f40bc2b8c884c Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:21:40 +0800 Subject: [PATCH 5/5] fix: conf path (#632) * fix: conf path * fix: conf path --- rpm/oceanbase-diagnostic-tool.spec | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rpm/oceanbase-diagnostic-tool.spec b/rpm/oceanbase-diagnostic-tool.spec index c9828b68..a55f87c1 100644 --- a/rpm/oceanbase-diagnostic-tool.spec +++ b/rpm/oceanbase-diagnostic-tool.spec @@ -42,11 +42,11 @@ cd $SRC_DIR \cp -rf $SRC_DIR/example $BUILD_DIR/SOURCES/example \cp -rf $SRC_DIR/resources $BUILD_DIR/SOURCES/ \cp -rf $SRC_DIR/dependencies/bin $BUILD_DIR/SOURCES/dependencies -\cp -rf $SRC_DIR/plugins $BUILD_DIR/plugins +\cp -rf $SRC_DIR/plugins $BUILD_DIR/SOURCES/ \cp -rf $SRC_DIR/rpm/init.sh $BUILD_DIR/SOURCES/init.sh \cp -rf $SRC_DIR/rpm/init_obdiag_cmd.sh $BUILD_DIR/SOURCES/init_obdiag_cmd.sh \cp -rf $SRC_DIR/rpm/obdiag_backup.sh $BUILD_DIR/SOURCES/obdiag_backup.sh -\cp -rf $SRC_DIR/conf $BUILD_DIR/SOURCES/conf +\cp -rf $SRC_DIR/conf $BUILD_DIR/SOURCES/ mkdir -p ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/lib/ mkdir -p ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/dependencies/bin find $SRC_DIR -name "obdiag" @@ -55,11 +55,11 @@ find $SRC_DIR -name "obdiag" \cp -rf $BUILD_DIR/SOURCES/resources ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/resources \cp -rf $BUILD_DIR/SOURCES/dependencies/bin ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/dependencies \cp -rf $BUILD_DIR/SOURCES/example ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ -\cp -rf $BUILD_DIR/SOURCES/conf ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ +\cp -rf $BUILD_DIR/SOURCES/conf ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/conf \cp -rf $BUILD_DIR/SOURCES/init.sh ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ \cp -rf $BUILD_DIR/SOURCES/init_obdiag_cmd.sh ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ \cp -rf $BUILD_DIR/SOURCES/obdiag_backup.sh ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ -\cp -rf $BUILD_DIR/plugins ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ +\cp -rf $BUILD_DIR/SOURCES/plugins ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/ %files