Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/origin/master' into 3.0-pa…
Browse files Browse the repository at this point in the history
…ckage
  • Loading branch information
wayyoungboy committed Dec 18, 2024
2 parents ba72c0c + c7bfeaa commit 238f5cd
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 8 deletions.
Binary file removed .DS_Store
Binary file not shown.
Empty file modified dev_helper.sh
100644 → 100755
Empty file.
108 changes: 108 additions & 0 deletions plugins/gather/tasks/observer/topsql.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
info_en: "[topsql info]"
info_cn: "[集群 topsql]"
command: obdiag gather scene run --scene=observer.topsql

task:
- version: "[4.0.0.0, *]"
steps:
- type: sql
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,SQL_ID,substr(query_sql, 1, 100) as query_sql,count(1),avg(ELAPSED_TIME),avg(EXECUTE_TIME),avg(QUEUE_TIME),avg(AFFECTED_ROWS),avg(GET_PLAN_TIME)
from oceanbase.gv$ob_sql_audit
where time_to_usec(now(6))-request_time < 10*60*1000000
group by SQL_ID order by avg(ELAPSED_TIME)*count(1) desc limit 20;"
global: true
- type: sql
tittle: Top-N SQL queries ranked by request count in the last 10 minutes
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,SQL_ID, substr(query_sql, 1, 100) as query_sql,count(*) as QPS, avg(t1.elapsed_time) RT
from oceanbase.gv$ob_sql_audit t1
where IS_EXECUTOR_RPC = 0
and request_time > (time_to_usec(now()) - 10*60*1000000)
and request_time < time_to_usec(now())
group by t1.sql_id order by QPS desc limit 20;"
global: true
- type: sql
tittle: The SQL that consumes the most CPU among all SQLs in the last 10 minutes
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,sql_id, substr(query_sql, 1, 100) as query_sql,
sum(elapsed_time - queue_time) as cpu_time, count(*) cnt,
avg(get_plan_time), avg(execute_time)
from oceanbase.gv$ob_sql_audit
where request_time > (time_to_usec(now()) - 10*60*1000000)
and request_time < time_to_usec(now())
group by sql_id order by cpu_time desc limit 20;"
global: true
- type: sql
tittle: Check whether there have been a large number of unreasonable remote execution requests for SQL executions in the past 10 minutes
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ count(*), plan_type
from oceanbase.gv$ob_sql_audit
where IS_EXECUTOR_RPC = 0
and is_inner_sql = 0
and request_time > (time_to_usec(now()) -10*60*1000000)
and request_time < time_to_usec(now())
group by plan_type limit 20;"
global: true
- type: sql
tittle: SQL for querying a full table scan
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name, SQL_ID, substr(query_sql, 1, 100) as query_sql, elapsed_time
from oceanbase.gv$ob_sql_audit
where table_scan = 1
and request_time > (time_to_usec(now()) - 10*60*1000000)
and request_time < time_to_usec(now())
and is_inner_sql = 0
order by elapsed_time desc limit 20;"
global: true
- version: "[3.0.0.0, 4.0.0.0]"
steps:
- type: sql
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,SQL_ID,substr(query_sql, 1, 100) as query_sql,count(1),avg(ELAPSED_TIME),avg(EXECUTE_TIME),avg(QUEUE_TIME),avg(AFFECTED_ROWS),avg(GET_PLAN_TIME)
from oceanbase.gv$sql_audit
where time_to_usec(now(6))-request_time < 10*60*1000000
and is_inner_sql = 0
group by SQL_ID order by avg(ELAPSED_TIME)*count(1) desc limit 20 ;"
global: true
- type: sql
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name, SQL_ID,substr(query_sql, 1, 100) as query_sql, count(*) as QPS, avg(t1.elapsed_time) RT
from oceanbase.gv$sql_audit t1
where IS_EXECUTOR_RPC = 0
and is_inner_sql = 0
and request_time > (time_to_usec(now()) - 10*60*1000000)
and request_time < time_to_usec(now())
group by t1.sql_id order by QPS desc limit 20;"
global: true
- type: sql
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name,sql_id, substr(query_sql, 1, 100) as query_sql,
sum(elapsed_time - queue_time) as cpu_time, count(*) cnt,
avg(get_plan_time), avg(execute_time)
from oceanbase.gv$sql_audit
where request_time > (time_to_usec(now()) - 10*60*1000000)
and request_time < time_to_usec(now())
and is_inner_sql = 0
group by sql_id order by cpu_time desc limit 20;"
global: true
- type: sql
tittle: Check whether there have been a large number of unreasonable remote execution requests for SQL executions in the past #{mtime} minutes
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ count(*), plan_type
from oceanbase.gv$sql_audit
where IS_EXECUTOR_RPC = 0
and request_time > (time_to_usec(now()) - 10*60*1000000)
and request_time < time_to_usec(now())
group by plan_type limit 20;"
global: true
- type: sql
sql:
"select /*+read_consistency(weak),query_timeout(100000000)*/ tenant_name, SQL_ID, substr(query_sql, 1, 100) as query_sql
from oceanbase.gv$sql_audit
where table_scan = 1
and request_time > (time_to_usec(now()) - 10*60*10000)
and request_time < time_to_usec(now())
and is_inner_sql = 0
order by elapsed_time desc limit 20;"
global: true
20 changes: 20 additions & 0 deletions src/common/ob_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,26 @@ def execute_sql_pretty(self, sql):
cursor.close()
return ret

def execute_display_cursor(self, business_sql):
if self.conn is None:
self._connect_db()
else:
self.conn.ping(reconnect=True)
cursor = self.conn.cursor()
try:
cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED")
cursor.execute(business_sql)

cursor.execute("select dbms_xplan.display_cursor(0, 'all')")
plan_result = from_db_cursor(cursor)
plan_result.align = 'l'
cursor.close()
return plan_result
except Exception as e:
raise Exception("execute display cursor error: {0}".format(e))
finally:
cursor.close()

def callproc(self, procname, args=()):
if self.conn is None:
self._connect_db()
Expand Down
26 changes: 19 additions & 7 deletions src/handler/gather/gather_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,14 @@ def __handle_from_node(self, node, local_stored_path):
resp["error"] = "can't find observer"
return resp
for pid_observer in pid_observer_list:
if self.scope == "sample":
self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer)
elif self.scope == "flame":
self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer)
else:
self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer)
self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer)
if self.__perf_checker(ssh_client):
if self.scope == "sample":
self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer)
elif self.scope == "flame":
self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer)
else:
self.__gather_perf_sample(ssh_client, remote_dir_full_path, pid_observer)
self.__gather_perf_flame(ssh_client, remote_dir_full_path, pid_observer)
self.__gather_top(ssh_client, remote_dir_full_path, pid_observer)

zip_dir(ssh_client, "/tmp", remote_dir_name, self.stdio)
Expand Down Expand Up @@ -177,6 +178,17 @@ def __gather_perf_sample(self, ssh_client, gather_path, pid_observer):
except:
self.stdio.error("generate perf sample data on server [{0}] failed".format(ssh_client.get_name()))

def __perf_checker(self, ssh_client):
cmd = "command -v perf"
result = ssh_client.exec_cmd(cmd)

if result:
self.stdio.verbose("perf is installed at [{0}] on server [{1}]".format(result, ssh_client.get_name()))
return True
else:
self.stdio.error("perf is not installed on server [{0}]. gather perf information will be skipped. Please install perf manually. ".format(ssh_client.get_name()))
return False

def __gather_perf_flame(self, ssh_client, gather_path, pid_observer):
try:
self.stdio.start_loading('gather perf flame')
Expand Down
28 changes: 27 additions & 1 deletion src/handler/gather/gather_plan_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self, context, gather_pack_dir='./', is_scene=False):
self.sql_audit_name = "gv$sql_audit"
self.plan_explain_name = "gv$plan_cache_plan_explain"
self.is_scene = is_scene
self.ob_version = "4.2.5.0"
if self.context.get_variable("gather_timestamp", None):
self.gather_timestamp = self.context.get_variable("gather_timestamp")
else:
Expand Down Expand Up @@ -165,6 +166,8 @@ def handle_plan_monitor_from_ob(cluster_name):
# 输出plan cache的信息
self.stdio.verbose("[sql plan monitor report task] report plan cache")
self.report_plan_cache(plan_explain_sql)
# dbms_xplan.display_cursor
self.report_display_cursor_obversion4(sql)
# 输出表结构的信息
self.stdio.verbose("[sql plan monitor report task] report table schema")
self.report_schema(user_sql, tenant_name)
Expand Down Expand Up @@ -216,7 +219,7 @@ def handle_plan_monitor_from_ob(cluster_name):
if getattr(sys, 'frozen', False):
absPath = os.path.dirname(sys.executable)
else:
absPath = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
absPath = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
cs_resources_path = os.path.join(absPath, "resources")
self.stdio.verbose("[cs resource path] : {0}".format(cs_resources_path))
target_resources_path = os.path.join(pack_dir_this_command, "resources")
Expand Down Expand Up @@ -667,6 +670,7 @@ def tenant_mode_detected(self):

if matched_version:
version = matched_version.group(2)
self.ob_version = version
major_version = int(version.split('.')[0])

self.sql_audit_name = "gv$ob_sql_audit" if major_version >= 4 else "gv$sql_audit"
Expand Down Expand Up @@ -998,3 +1002,25 @@ def report_db_time_display_obversion4(self, sql_plan_monitor_db_time):
self.stdio.exception("DB Time display> %s" % sql_plan_monitor_db_time)
self.stdio.exception(repr(e))
pass

def __is_select_statement(self, sql):
stripped_sql = sql.strip().upper()
return stripped_sql.startswith('SELECT')

def report_display_cursor_obversion4(self, display_cursor_sql):
if not self.__is_select_statement(display_cursor_sql):
return
try:
if not StringUtils.compare_versions_lower(self.ob_version, "4.2.5.0"):
plan_result = self.db_connector.execute_display_cursor(display_cursor_sql)
self.stdio.verbose("execute SQL: %s", display_cursor_sql)
step = "obclient> SET TRANSACTION ISOLATION LEVEL READ COMMITTED;\n{0}\nselect dbms_xplan.display_cursor(0, 'all');".format(display_cursor_sql)
self.report_pre(step)
self.report_pre(plan_result)
self.stdio.verbose("display_cursor report complete")
else:
self.stdio.verbose("display_cursor report requires the OB version to be greater than 4.2.5.0 Your version: {0} does not meet this requirement.".format(self.ob_major_version))
except Exception as e:
self.stdio.exception("display_cursor report> %s" % display_cursor_sql)
self.stdio.exception(repr(e))
pass

0 comments on commit 238f5cd

Please sign in to comment.