diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml index 7bb8ab79..41fff99c 100644 --- a/.github/workflows/build_package.yml +++ b/.github/workflows/build_package.yml @@ -83,10 +83,13 @@ jobs: sudo apt-get install -y alien - name: Convert RPM to DEB - run: sudo alien -k --scripts oceanbase-diagnostic-tool-*.rpm + run: | + sudo alien -k --scripts oceanbase-diagnostic-tool-*.rpm + pwd - name: Upload DEB Artifact uses: actions/upload-artifact@v3 with: name: obdiag-deb-package - path: /home/runner/work/obdiag/obdiag/oceanbase-diagnostic-tool_*.deb + path: ./oceanbase-diagnostic-tool_*.deb + retention-days: 3 \ No newline at end of file diff --git a/.github/workflows/test_sql_rule.yml b/.github/workflows/test_sql_rule.yml new file mode 100644 index 00000000..e02a7193 --- /dev/null +++ b/.github/workflows/test_sql_rule.yml @@ -0,0 +1,30 @@ +name: Test Full Scan Rule + +on: + push: + branches: "*" + pull_request: + branches: "*" + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for proper version detection + + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements3.txt + + - name: Run tests + run: python -m unittest discover -s test/analyzer/sql -p 'test_*.py' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3ad42e75..990bea80 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea/ +.vscode/ venv/ *.pyc *site-packages/ diff --git a/clean_all_result.sh b/clean_all_result.sh index cfc510d9..bf708de6 100755 --- a/clean_all_result.sh +++ b/clean_all_result.sh @@ -1,4 +1,4 @@ -rm -rf ./gather_pack_* -rm -rf ./analyze_pack_* -rm -rf ./analyze_flt_result* -rm -rf ./check_report \ No newline at end of file +rm -rf ./obdiag_gather_pack_* +rm -rf ./obdiag_analyze_pack_* +rm -rf ./obdiag_analyze_flt_result* +rm -rf ./obdiag_check_report \ No newline at end of file diff --git a/cmd.py b/cmd.py index 1e54e4e9..2c14163e 100644 --- a/cmd.py +++ b/cmd.py @@ -624,6 +624,49 @@ def _do_command(self, obdiag): return obdiag.analyze_fuction('analyze_flt_trace', self.opts) +class ObdiagAnalyzeSQLCommand(ObdiagOriginCommand): + + def __init__(self): + super(ObdiagAnalyzeSQLCommand, self).__init__('sql', 'Analyze oceanbase sql from sql_audit ') + self.parser.add_option('--host', type='string', help="tenant connection host") + self.parser.add_option('--port', type='string', help="tenant connection port") + self.parser.add_option('--password', type='string', help="tenant connection user password", default='') + self.parser.add_option('--user', type='string', help="tenant connection user name") + self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'") + self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'") + self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') + self.parser.add_option('--level', type='string', help="The alarm level, optional parameters [critical, warn, notice, ok]", default='notice') + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + + def init(self, cmd, args): + super(ObdiagAnalyzeSQLCommand, self).init(cmd, args) + self.parser.set_usage('%s [options]' % self.prev_cmd) + return self + + def _do_command(self, obdiag): + return obdiag.analyze_fuction('analyze_sql', self.opts) + + +class ObdiagAnalyzeSQLReviewCommand(ObdiagOriginCommand): + + def __init__(self): + super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from sql_audit ') + self.parser.add_option('--host', type='string', help="tenant connection host") + self.parser.add_option('--port', type='string', help="tenant connection port") + self.parser.add_option('--password', type='string', help="tenant connection user password", default='') + self.parser.add_option('--user', type='string', help="tenant connection user name") + self.parser.add_option('--files', type='string', action="append", help="specify files") + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + + def init(self, cmd, args): + super(ObdiagAnalyzeSQLReviewCommand, self).init(cmd, args) + self.parser.set_usage('%s [options]' % self.prev_cmd) + return self + + def _do_command(self, obdiag): + return obdiag.analyze_fuction('analyze_sql_review', self.opts) + + class ObdiagCheckCommand(ObdiagOriginCommand): def __init__(self): @@ -748,6 +791,8 @@ def __init__(self): super(ObdiagAnalyzeCommand, self).__init__('analyze', 'Analyze oceanbase diagnostic info') self.register_command(ObdiagAnalyzeLogCommand()) self.register_command(ObdiagAnalyzeFltTraceCommand()) + self.register_command(ObdiagAnalyzeSQLCommand()) + self.register_command(ObdiagAnalyzeSQLReviewCommand()) class ObdiagRCACommand(MajorCommand): diff --git a/common/ob_connector.py b/common/ob_connector.py index 1ba0259c..961044e6 100644 --- a/common/ob_connector.py +++ b/common/ob_connector.py @@ -46,6 +46,16 @@ def init(self): except Exception as e: self.stdio.verbose(e) + def __enter__(self): + """Ensures the database connection is open upon entering the 'with' block.""" + self._connect_to_db() + return self + + def __exit__(self, exception_type, exception_value, traceback): + """Automatically closes the database connection when exiting the 'with' block.""" + if self.connection: + self.connection.close() + def _connect_db(self): try: self.conn = mysql.connect( @@ -82,17 +92,28 @@ def execute_sql(self, sql): cursor.close() return ret - def execute_sql_return_columns_and_data(self, sql): + def execute_sql_return_columns_and_data(self, sql, params=None): + """ + Executes an SQL query and returns column names and data. + + :param sql: The SQL statement to execute, using %s as a placeholder for parameters. + :param parameters: A tuple or list of parameters to substitute into the SQL statement. + :return: A tuple containing a list of column names and a list of rows (each a tuple). + """ if self.conn is None: self._connect_db() else: self.conn.ping(reconnect=True) - cursor = self.conn.cursor() - cursor.execute(sql) - column_names = [col[0] for col in cursor.description] - ret = cursor.fetchall() - cursor.close() - return column_names, ret + + with self.conn.cursor() as cursor: + if params: + cursor.execute(sql, params) + else: + cursor.execute(sql) + + column_names = [col[0] for col in cursor.description] + data = cursor.fetchall() + return column_names, data def execute_sql_return_cursor_dictionary(self, sql): if self.conn is None: diff --git a/common/tool.py b/common/tool.py index 8bc35010..0fbe0220 100644 --- a/common/tool.py +++ b/common/tool.py @@ -1396,3 +1396,59 @@ def get_nodes_list(context, nodes, stdio=None): return None return new_nodes return None + + +class SQLUtil(object): + re_trace = re.compile(r'''\/\*.*trace_id((?!\/\*).)*rpc_id.*\*\/''', re.VERBOSE) + re_annotation = re.compile(r'''\/\*((?!\/\*).)*\*\/''', re.VERBOSE) + re_interval = re.compile( + r'''interval\s?(\?|\-?\d+)\s?(day|hour|minute|second|microsecond|week|month|quarter|year|second_microsecond|minute_microsecond|minute_second|hour_microsecond|hour_second|hour_minute|day_microsecond|day_second|day_minute|day_hour|year_month)''', + re.VERBOSE, + ) + re_force_index = re.compile(r'''force[\s]index[\s][(]\w+[)]''', re.VERBOSE) + re_cast_1 = re.compile(r'''cast\(.*?\(.*?\)\)''', re.VERBOSE) + re_cast_2 = re.compile(r'''cast\(.*?\)''', re.VERBOSE) + re_now = re.compile(r'''now\(\)''', re.VERBOSE) + + def remove_sql_text_affects_parser(self, sql): + sql = sql.lower().strip() + sql = self.remove_hint_and_annotate(sql) + sql = self.remove_force_index(sql) + sql = self.remove_now_in_insert(sql) + sql = self.remove_semicolon(sql) + return sql + + def remove_hint_and_annotate(self, sql): + sql = sql.lower() + sql = re.sub(self.re_annotation, '', sql) + sql = re.sub(self.re_trace, '', sql) + return sql + + def replace_interval_day(self, sql): + sql = sql.lower() + sql = re.sub(self.re_interval, '?', sql) + return sql + + def remove_force_index(self, sql): + sql = sql.lower() + sql = re.sub(self.re_force_index, '', sql) + return sql + + def remove_cast(self, sql): + sql = sql.lower() + sql = re.sub(self.re_cast_1, '?', sql) + sql = re.sub(self.re_cast_2, '?', sql) + return sql + + def remove_now_in_insert(self, sql): + sql = sql.lower().lstrip() + if sql.startswith('insert'): + sql = re.sub(self.re_now, '?', sql) + return sql + + def remove_semicolon(self, sql): + sql = sql.strip() + return sql[:-1] if sql[-1] == ';' else sql + + def get_db_id(self, database_alias, user_id): + return database_alias + '-' + user_id diff --git a/conf/inner_config.yml b/conf/inner_config.yml index c0480eda..32db3c84 100644 --- a/conf/inner_config.yml +++ b/conf/inner_config.yml @@ -23,3 +23,11 @@ gather: scenes_base_path: "~/.obdiag/gather/tasks" rca: result_path: "./rca/" +analyze_sql: + output_type: "html" + result_path: "./obdiag_analyze_sql/" + sql_audit_limit: 10 + elapsed_time: 100 +analyze_sql_review: + output_type: "html" + result_path: "./obdiag_analyze_sql_review/" diff --git a/config.py b/config.py index 3b9811e5..449d8a22 100644 --- a/config.py +++ b/config.py @@ -91,6 +91,15 @@ 'rca': { 'result_path': './rca/', }, + 'analyze_sql': { + 'output_type': 'html', + 'result_path': './obdiag_analyze_sql/', + 'sql_audit_limit': 2000, + }, + 'analyze_sql_review': { + 'output_type': 'html', + 'result_path': './obdiag_analyze_sql_review/', + }, } diff --git a/core.py b/core.py index 9cccc54e..8014c864 100644 --- a/core.py +++ b/core.py @@ -31,6 +31,8 @@ from err import CheckStatus, SUG_SSH_FAILED from handler.analyzer.analyze_flt_trace import AnalyzeFltTraceHandler from handler.analyzer.analyze_log import AnalyzeLogHandler +from handler.analyzer.analyze_sql import AnalyzeSQLHandler +from handler.analyzer.analyze_sql_review import AnalyzeSQLReviewHandler from handler.checker.check_handler import CheckHandler from handler.checker.check_list import CheckListHandler from handler.gather.gather_log import GatherLogHandler @@ -267,6 +269,14 @@ def analyze_fuction(self, function_type, opt): self.set_context(function_type, 'analyze', config) handler = AnalyzeFltTraceHandler(self.context) handler.handle() + elif function_type == 'analyze_sql': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeSQLHandler(self.context) + handler.handle() + elif function_type == 'analyze_sql_review': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeSQLReviewHandler(self.context) + handler.handle() else: self._call_stdio('error', 'Not support analyze function: {0}'.format(function_type)) return False diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py new file mode 100644 index 00000000..6825c6b0 --- /dev/null +++ b/handler/analyzer/analyze_sql.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: analyze_sql.py +@desc: +""" +import datetime +import os +from tabulate import tabulate +from colorama import Fore, Style +from common.constant import const +from common.tool import Util +from common.tool import TimeUtils +from common.ob_connector import OBConnector +from handler.meta.sql_meta import GlobalSqlMeta +from handler.meta.html_meta import GlobalHtmlMeta +from common.tool import FileUtil +from handler.analyzer.sql.rule_manager import SQLReviewRuleManager +from handler.analyzer.sql.meta.sys_tenant_meta import SysTenantMeta +from common.command import get_observer_version_by_sql + + +class AnalyzeSQLHandler(object): + def __init__(self, context): + super(AnalyzeSQLHandler, self).__init__() + self.context = context + self.stdio = context.stdio + self.from_time_str = None + self.to_time_str = None + self.from_timestamp = None + self.to_timestamp = None + self.config_path = const.DEFAULT_CONFIG_PATH + self.db_connector_provided = False + self.tenant_name = "all" + self.db_user = None + self.sql_audit_limit = 10 + self.elapsed_time = 100 + self.level = 'notice' + self.ob_version = '4.0.0.0' + self.sql_audit_keys = [ + 'svrIp', + 'svrPort', + 'requestId', + 'clientIp', + 'tenantName', + 'tenantId', + 'dbName', + 'dbId', + 'querySql', + 'planId', + 'sqlId', + 'traceId', + 'requestTime', + 'returnRows', + 'affectedRows', + 'partitionCount', + 'retCode', + 'event0WaitTimeUs', + 'event1WaitTimeUs', + 'event2WaitTimeUs', + 'event3WaitTimeUs', + 'totalWaitTimeMicro', + 'totalWaits', + 'rpcCount', + 'planType', + 'isInnerSql', + 'isExecutorRpc', + 'isHitPlan', + 'elapsedTime', + 'cpuTime', + 'netTime', + 'netWaitTime', + 'queueTime', + 'decodeTime', + 'getPlanTime', + 'executeTime', + 'applicationWaitTime', + 'concurrencyWaitTime', + 'userIoWaitTime', + 'scheduleTime', + 'rowCacheHit', + 'bloomFilterCacheHit', + 'blockCacheHit', + 'blockIndexCacheHit', + 'diskReads', + 'retryCount', + 'tableScan', + 'consistencyLevel', + 'memstoreReadRowCount', + 'ssstoreReadRowCount', + 'planCachePlanExplain', + ] + + def init_inner_config(self): + self.inner_config = self.context.inner_config + basic_config = self.inner_config['obdiag']['basic'] + self.config_path = basic_config['config_path'] + self.local_stored_parrent_path = self.inner_config['analyze_sql']['result_path'] + sql_audit_limit = int(self.inner_config['analyze_sql']['sql_audit_limit']) + if sql_audit_limit: + self.sql_audit_limit = sql_audit_limit + elapsed_time = int(self.inner_config['analyze_sql']['elapsed_time']) + if elapsed_time: + self.elapsed_time = elapsed_time + if not os.path.exists(os.path.abspath(self.local_stored_parrent_path)): + self.stdio.warn('No such directory {0}, Now create it'.format(os.path.abspath(self.local_stored_parrent_path))) + os.makedirs(os.path.abspath(self.local_stored_parrent_path)) + return True + + def init_config(self): + ob_cluster = self.context.cluster_config + self.ob_cluster = ob_cluster + self.sys_connector = OBConnector(ip=ob_cluster.get("db_host"), port=ob_cluster.get("db_port"), username=ob_cluster.get("tenant_sys").get("user"), password=ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100) + self.ob_cluster_name = ob_cluster.get("ob_cluster_name") + return True + + def init_ob_version(self): + self.ob_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) + return True + + def __init_db_connector(self): + if self.db_user: + self.db_connector_provided = True + self.db_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.db_user, password=self.db_password, stdio=self.stdio, timeout=100) + else: + self.db_connector = self.sys_connector + + def init_option(self): + options = self.context.options + from_option = Util.get_option(options, 'from') + to_option = Util.get_option(options, 'to') + since_option = Util.get_option(options, 'since') + db_user_option = Util.get_option(options, 'user') + if db_user_option: + tenant_name = self.__extract_tenant_name(db_user_option) + if tenant_name: + self.db_user = db_user_option + self.tenant_name = tenant_name + else: + return False + db_password_option = Util.get_option(options, 'password') + self.db_password = db_password_option + level_option = Util.get_option(options, 'level') + if level_option: + self.level = level_option + if from_option is not None and to_option is not None: + try: + from_timestamp = TimeUtils.parse_time_str(from_option) + to_timestamp = TimeUtils.parse_time_str(to_option) + self.from_time_str = from_option + self.to_time_str = to_option + except: + self.stdio.exception('Error: Datetime is invalid. Must be in format yyyy-mm-dd hh:mm:ss. from_datetime={0}, to_datetime={1}'.format(from_option, to_option)) + return False + if to_timestamp <= from_timestamp: + self.stdio.error('Error: from datetime is larger than to datetime, please check.') + return False + elif (from_option is None or to_option is None) and since_option is not None: + now_time = datetime.datetime.now() + self.to_time_str = (now_time + datetime.timedelta(minutes=1)).strftime('%Y-%m-%d %H:%M:%S') + self.from_time_str = (now_time - datetime.timedelta(seconds=TimeUtils.parse_time_length_to_sec(since_option))).strftime('%Y-%m-%d %H:%M:%S') + self.stdio.print('analyze sql from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) + else: + self.stdio.warn('No time option provided, default processing is based on the last 30 minutes') + now_time = datetime.datetime.now() + self.to_time_str = (now_time + datetime.timedelta(minutes=1)).strftime('%Y-%m-%d %H:%M:%S') + if since_option is not None: + self.from_time_str = (now_time - datetime.timedelta(seconds=TimeUtils.parse_time_length_to_sec(since_option))).strftime('%Y-%m-%d %H:%M:%S') + else: + self.from_time_str = (now_time - datetime.timedelta(minutes=30)).strftime('%Y-%m-%d %H:%M:%S') + self.stdio.print('analyze sql from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) + self.from_timestamp = TimeUtils.datetime_to_timestamp(self.from_time_str, self.stdio) + self.to_timestamp = TimeUtils.datetime_to_timestamp(self.to_time_str, self.stdio) + return True + + def handle(self): + if not self.init_option(): + self.stdio.error('init option failed') + return False + if not self.init_inner_config(): + self.stdio.error('init inner config failed') + return False + if not self.init_config(): + self.stdio.error('init config failed') + return False + if not self.init_ob_version(): + self.stdio.error('init ob version failed') + return False + self.__init_db_connector() + self.local_store_dir = os.path.join(self.local_stored_parrent_path, "sql_{0}".format(TimeUtils.timestamp_to_filename_time(TimeUtils.get_current_us_timestamp()))) + if not os.path.exists(os.path.abspath(self.local_store_dir)): + os.makedirs(os.path.abspath(self.local_store_dir)) + self.stdio.print("Use {0} as result dir.".format(self.local_store_dir)) + raw_results = self.__select_sql_audit() + results = self.__filter_max_elapsed_time_with_same_sql_id(raw_results) + for item in results: + item['planCachePlanExplain'] = self.__get_plan_cache_plan_explain(item) + item['diagnosticEntries'] = self.__parse_sql_review(item["querySql"]) + html_result = self.__generate_html_result(results) + FileUtil.write_append(os.path.join(self.local_store_dir, "sql_analyze_result.html"), html_result) + self.__print_result() + + def __extract_tenant_name(self, username): + """ + Extracts the tenant name from the given OBClient username format. + Parameters: + username (str): The username portion of the OBClient connection string, formatted as 'user@tenantName' or 'user@tenantName#clusterName' or 'clusterName:tenantName:user'. + Returns: + The tenant name, or None if parsing fails + """ + # Check for 'user@tenantName' or 'user@tenantName#clusterName' format + if "@" in username: + parts = username.split('@') + if len(parts) == 2: + return parts[1].split('#')[0] if '#' in parts[1] else parts[1] + + # Check for 'clusterName:tenantName:user' format + elif ":" in username: + parts = username.split(':') + if len(parts) >= 3: + return parts[1] + + self.stdio.error("Unable to recognize the user name format") + return None + + def __select_sql_audit(self): + sql = str(GlobalSqlMeta().get_value(key="get_sql_audit_ob4_for_sql_review")) + replacements = {"##REPLACE_REQUEST_FROM_TIME##": str(self.from_timestamp), "##REPLACE_REQUEST_TO_TIME##": str(self.to_timestamp), "##REPLACE_ELAPSED_TIME##": str(self.elapsed_time), "##REPLACE_LIMIT##": str(self.sql_audit_limit)} + for old, new in replacements.items(): + sql = sql.replace(old, new) + self.stdio.verbose("excute SQL: {0}".format(sql)) + columns, rows = self.db_connector.execute_sql_return_columns_and_data(sql) + result = [] + for row in rows: + result.append(dict(zip(columns, row))) + self.stdio.print("excute select sql_audit SQL complete, the length of raw result is {0}".format(len(result))) + return result + + def __get_plan_cache_plan_explain(self, data): + meta = SysTenantMeta(self.sys_connector, self.stdio, self.ob_version) + column_names, table_data = meta.get_plain_explain_raw(data['tenantId'], data['svrIp'], data['svrPort'], data['planId']) + formatted_table = tabulate(table_data, headers=column_names, tablefmt="grid") + return formatted_table + + def __filter_max_elapsed_time_with_same_sql_id(self, data): + # Create a dictionary to hold the max elapsed time for each db_id, sql_id pair + max_elapsed_times = {} + for item in data: + key = (item['tenantId'], item['dbId'], item['sqlId']) + if key not in max_elapsed_times or item['elapsedTime'] > max_elapsed_times[key]['elapsedTime']: + max_elapsed_times[key] = item + # Extract the values which are the filtered list + filtered_data = list(max_elapsed_times.values()) + self.stdio.print("filter filter max elapsed time with same sql_id complete, raw data length:{0}, filter data length:{1}".format(len(data), len(filtered_data))) + return filtered_data + + def __parse_sql_review(self, sql): + rules = SQLReviewRuleManager() + result = rules.manager.analyze_sql_statement(sql, self.level) + return result + + def __generate_current_row_selected_keys(self, diagnostics, keys, rowspan_length): + current_row = [f"{diagnostics[key]}" for key in keys] + return current_row + + def __generate_html_table(self, diagnostics): + rows = [] + rowspan_length = len(diagnostics['diagnosticEntries']) + current_row = self.__generate_current_row_selected_keys(diagnostics, self.sql_audit_keys, rowspan_length) + table_head = ''.join(current_row) + for idx, diag in enumerate(diagnostics['diagnosticEntries']): + if idx == 0: + # Start a new row with SQL text having rowspan equal to the number of diagnostics. + row = table_head + f"{diag.class_name}", f"{diag.description}", f"{diag.level.string}", f"{diag.suggestion}" + rows.append("" + "".join(row) + "") + else: + rows.append("" + f"{diag.class_name}" + f"{diag.description}" + f"{diag.level.string}" + f"{diag.suggestion}" + "") + return "".join(rows) + + def __generate_table_headers(self): + headers_html = "".join([f"{item}" for item in self.sql_audit_keys]) + return headers_html + + def __generate_html_result(self, all_results): + full_html = "" + table_headers = self.__generate_table_headers() + all_sql_entries_html = "" + i = 0 + for data in all_results: + i += 1 + sql_entries_html = "".join(self.__generate_html_table(data)) + all_sql_entries_html += sql_entries_html + full_html += ( + GlobalHtmlMeta().get_value(key="analyze_sql_html_head_template") + + f""" +

Command: "obdiag analyze sql"

+

Files: "obdiag analyze sql"

+

租户诊断结果

+ + + + {table_headers} + + + + + + + + {all_sql_entries_html} + +
诊断规则规则描述规则级别诊断建议
+ """ + ) + full_html += GlobalHtmlMeta().get_value(key="html_footer_temple") + return full_html + + def __print_result(self): + self.stdio.print(Fore.YELLOW + "\nAnalyze sql results stored in this directory: {0}\n".format(self.local_store_dir) + Style.RESET_ALL) diff --git a/handler/analyzer/analyze_sql_review.py b/handler/analyzer/analyze_sql_review.py new file mode 100644 index 00000000..7a43b82c --- /dev/null +++ b/handler/analyzer/analyze_sql_review.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: analyze_sql_review.py +@desc: +""" +import os +import sqlparse +import json +from colorama import Fore, Style +from common.constant import const +from common.tool import Util +from common.tool import TimeUtils +from common.tool import FileUtil +from common.ob_connector import OBConnector +from handler.analyzer.sql.rule_manager import SQLReviewRuleManager +from handler.meta.sql_meta import GlobalSqlMeta +from handler.meta.html_meta import GlobalHtmlMeta + + +class AnalyzeSQLReviewHandler(object): + def __init__(self, context): + super(AnalyzeSQLReviewHandler, self).__init__() + self.context = context + self.stdio = context.stdio + self.from_time_str = None + self.to_time_str = None + self.config_path = const.DEFAULT_CONFIG_PATH + self.analyze_files_list = None + self.directly_analyze_files = False + + def init_inner_config(self): + self.stdio.verbose("init inner config start") + self.inner_config = self.context.inner_config + basic_config = self.inner_config['obdiag']['basic'] + self.config_path = basic_config['config_path'] + self.local_stored_parrent_path = self.inner_config['analyze_sql_review']['result_path'] + if not os.path.exists(os.path.abspath(self.local_stored_parrent_path)): + self.stdio.warn('No such directory {0}, Now create it'.format(os.path.abspath(self.local_stored_parrent_path))) + os.makedirs(os.path.abspath(self.local_stored_parrent_path)) + self.stdio.verbose("init inner config success") + return True + + def init_config(self): + self.stdio.verbose("Init config start") + ob_cluster = self.context.cluster_config + self.ob_cluster = ob_cluster + self.sys_connector = OBConnector(ip=ob_cluster.get("db_host"), port=ob_cluster.get("db_port"), username=ob_cluster.get("tenant_sys").get("user"), password=ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100) + self.ob_cluster_name = ob_cluster.get("ob_cluster_name") + self.stdio.verbose("Init config success") + return True + + def __init_db_connector(self): + if self.db_user: + self.stdio.verbose("Init db connector start") + self.db_connector_provided = True + self.db_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.db_user, password=self.db_password, stdio=self.stdio, timeout=100) + self.stdio.verbose("Init db connector end") + else: + self.db_connector = self.sys_connector + + def init_option(self): + options = self.context.options + files_option = Util.get_option(options, 'files') + if files_option: + self.directly_analyze_files = True + self.analyze_files_list = files_option + db_user_option = Util.get_option(options, 'user') + db_password_option = Util.get_option(options, 'password') + tenant_name_option = Util.get_option(options, 'tenant_name') + if tenant_name_option is not None: + self.tenant_name = tenant_name_option + self.db_user = db_user_option + self.db_password = db_password_option + return True + + def handle(self): + if not self.init_option(): + self.stdio.error('init option failed') + return False + if not self.init_inner_config(): + self.stdio.error('init inner config failed') + return False + if not self.init_config(): + self.stdio.error('init config failed') + return False + self.__init_db_connector() + self.local_store_dir = os.path.join(self.local_stored_parrent_path, "sql_{0}".format(TimeUtils.timestamp_to_filename_time(TimeUtils.get_current_us_timestamp()))) + if not os.path.exists(os.path.abspath(self.local_store_dir)): + os.makedirs(os.path.abspath(self.local_store_dir)) + self.stdio.print("Use {0} as result dir.".format(self.local_store_dir)) + if self.directly_analyze_files: + all_results = self.__directly_analyze_files() + results = self.__parse_results(all_results) + html_result = self.__generate_html_result(results) + FileUtil.write_append(os.path.join(self.local_store_dir, "sql_review_result.html"), html_result) + else: + all_results = self.__analyze_sql_audit() + self.__print_result() + + def __directly_analyze_files(self): + sql_files = self.__get_sql_file_list() + if len(sql_files) == 0: + self.stdio.warn("Failed to find SQL files from the --files option provided") + return None + file_results = {} + sql_results = {} + for file in sql_files: + sql_list = self.__parse_sql_file(file) + for sql in sql_list: + rules = SQLReviewRuleManager() + result = rules.manager.analyze_sql_statement(sql) + sql_results[sql] = result + file_results[file] = sql_results + return file_results + + def __analyze_sql_audit(self): + return {} + + def __get_sql_file_list(self): + """ + :param: + :return: sql_files + """ + sql_files = [] + if self.analyze_files_list and len(self.analyze_files_list) > 0: + for path in self.analyze_files_list: + if os.path.exists(path): + if os.path.isfile(path): + sql_files.append(path) + else: + sql_file_list = FileUtil.find_all_file(path) + if len(sql_file_list) > 0: + sql_files.extend(sql_file_list) + self.stdio.print("The list of SQL files to be processed is as follows: {0}".format(sql_files)) + return sql_files + + def __parse_sql_file(self, file_path): + with open(file_path, 'r') as file: + sql_content = file.read() + statements = sqlparse.split(sql_content) + sql_list = [stmt for stmt in statements if stmt.strip()] + return sql_list + + def __parse_results(self, results): + reports = [] + for file_name, file_results in results.items(): + diagnostic_entries = [] + for sql, sql_results in file_results.items(): + diagnostics = [] + for sql_result in sql_results: + diagnostic = {"ruleClassName": sql_result.class_name, "ruleName": sql_result.rule_name, "ruleDescription": sql_result.description, "ruleLevel": sql_result.level.value, "suggestion": sql_result.suggestion} + diagnostics.append(diagnostic) + diagnostic_entry = {"sqlText": sql, "diagnostics": diagnostics} + diagnostic_entries.append(diagnostic_entry) + report = {"command": "obdiag analyze sql_review", "options": {"files": file_name}, "diagnosticEntries": diagnostic_entries} + reports.append(report) + return reports + + def __generate_html_table(self, sql_entry): + diagnostics = sql_entry["diagnostics"] + sql_text = sql_entry["sqlText"] + rows = [] + current_row = [f"{sql_text}"] + + for idx, diag in enumerate(diagnostics): + if idx == 0: + # Start a new row with SQL text having rowspan equal to the number of diagnostics. + row = current_row + [f"{diag['ruleClassName']}", f"{diag['ruleDescription']}", f"{diag['ruleLevel'].string}", f"{diag['suggestion']}"] + rows.append("" + "".join(row) + "") + else: + rows.append("" + f"{diag['ruleClassName']}" + f"{diag['ruleDescription']}" + f"{diag['ruleLevel'].string}" + f"{diag['suggestion']}" + "") + return "".join(rows) + + def __generate_html_result(self, all_results): + full_html = "" + for data in all_results: + # print(data) + diagnostic_entries = data["diagnosticEntries"] + sql_entries_html = "".join([self.__generate_html_table(entry) for entry in diagnostic_entries]) + full_html += ( + GlobalHtmlMeta().get_value(key="sql_review_html_head_template") + + f""" +

Command: {data["command"]}

+

Files: {data["options"]["files"]}

+

诊断结果

+ + + + + + + + + + + + {sql_entries_html} + +
SQL文本诊断规则规则描述规则级别调优建议
+ """ + ) + full_html += GlobalHtmlMeta().get_value(key="html_footer_temple") + return full_html + + def __print_result(self): + self.stdio.print(Fore.YELLOW + "\nAnalyze sql_review results stored in this directory: {0}\n".format(self.local_store_dir) + Style.RESET_ALL) diff --git a/handler/analyzer/sql/__init__.py b/handler/analyzer/sql/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/engine.py b/handler/analyzer/sql/engine.py new file mode 100644 index 00000000..699406af --- /dev/null +++ b/handler/analyzer/sql/engine.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: engine.py +@desc: +""" + +from sqlgpt_parser.parser.oceanbase_parser import parser as oceanbase_parser + + +class Engine(object): + def __new__(cls): + singleton = cls.__dict__.get('__singleton__') + if singleton is not None: + return singleton + + cls.__singleton__ = singleton = object.__new__(cls) + + return singleton + + def parse(self, sql, tracking=False): + return oceanbase_parser.parse(sql, tracking=tracking) diff --git a/handler/analyzer/sql/meta/_init_.py b/handler/analyzer/sql/meta/_init_.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/meta/_init_.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/meta/metadata.py b/handler/analyzer/sql/meta/metadata.py new file mode 100644 index 00000000..faf56b4b --- /dev/null +++ b/handler/analyzer/sql/meta/metadata.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: metadata.py +@desc: +""" + +from typing import List +from enum import Enum, unique + + +@unique +class IndexType(Enum): + PRIMARY = '1.primary' + UNIQUE = '2.unique' + NORMAL = '3.normal' + + +class MetaData(object): + def __init__(self, table_list: List, statistics_list: List): + self.table_list = table_list + self.statistics_list = statistics_list + + +class Table(object): + def __init__(self, database_name, table_name, column_list: List, index_list: List, table_rows): + self.table_name = table_name + self.database_name = database_name + self.column_list = column_list + self.index_list = index_list + self.table_rows = table_rows + + +class Column(object): + def __init__(self, column_name, column_type, column_nullable): + self.column_name = column_name + self.column_type = column_type + self.column_nullable = column_nullable + + +class Index(object): + def __init__( + self, + index_name, + column_list: List, + index_type: IndexType, + index_all_match=None, + index_back=None, + extract_range=None, + has_interesting_order=None, + ): + self.index_name = index_name + self.column_list = column_list + self.column_count = len(column_list) + self.index_type = index_type + self.index_all_match = index_all_match + self.index_back = index_back + self.extract_range = extract_range + self.has_interesting_order = has_interesting_order + + +class Selectivity(object): + def __init__(self, column_name, min_value, max_value, ndv=None): + self.column_name = column_name + self.min_value = min_value + self.max_value = max_value + self.ndv = ndv + + +class Statistics(object): + def __init__(self, database_name, table_name, selectivity_list: List): + self.database_name = database_name + self.table_name = table_name + self.selectivity_list = selectivity_list diff --git a/handler/analyzer/sql/meta/sys_tenant_meta.py b/handler/analyzer/sql/meta/sys_tenant_meta.py new file mode 100644 index 00000000..64a83369 --- /dev/null +++ b/handler/analyzer/sql/meta/sys_tenant_meta.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/6/21 +@file: sys_tenant_meta.py +@desc: +""" +from handler.meta.sql_meta import GlobalSqlMeta +from common.ob_connector import OBConnector +from common.tool import StringUtils + + +class SysTenantMeta(object): + + def __init__(self, connector: OBConnector, stdio, ob_version='4.0.0.0'): + self.sys_connector = connector + self.stdio = stdio + self.ob_version = ob_version + + def get_tables(self, tenant_id: int, db_name: str): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tables_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tables")) + sql = sql.replace('##REPLACE_DATABASE_NAME##', db_name) + self.stdio.verbose("get tables excute SQL: {0}".format(sql)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_table_ddl(self, tenant_id: int, db_name: str, table_name: str): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tables_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tables")) + sql = sql.replace('##REPLACE_DATABASE_NAME##', db_name) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_databases(self, tenant_id: int): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tables_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tables")) + sql = sql.replace('##REPLACE_DATABASE_NAME##', str(tenant_id)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_ob_database_id(self, key): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tables_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tables")) + sql = sql.replace('##REPLACE_DATABASE_NAME##', str(key)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_ob_tenant_id(self, key): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tables_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tables")) + sql = sql.replace('##REPLACE_DATABASE_NAME##', str(key)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_database_name(self, tenant_id, database_id): + sql = str(GlobalSqlMeta().get_value(key="get_database_name")) + sql = sql.replace('##REPLACE_TENANT_ID##', str(tenant_id)).replace('REPLACE_DATABASE_ID', str(database_id)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_plain_explain(self, tenant_id: int, svr_ip: str, port: int, plan_id: int): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains")) + replacements = {"##REPLACE_TENANT_ID##": str(tenant_id), "##REPLACE_SVR_IP##": svr_ip, "##REPLACE_SVR_PORT##": str(port), "##REPLACE_PLAN_ID##": str(plan_id)} + for old, new in replacements.items(): + sql = sql.replace(old, new) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_plain_explain_raw(self, tenant_id: int, svr_ip: str, port: int, plan_id: int): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains")) + replacements = {"##REPLACE_TENANT_ID##": str(tenant_id), "##REPLACE_SVR_IP##": svr_ip, "##REPLACE_SVR_PORT##": str(port), "##REPLACE_PLAN_ID##": str(plan_id)} + for old, new in replacements.items(): + sql = sql.replace(old, new) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + return columns, rows diff --git a/handler/analyzer/sql/rule_manager.py b/handler/analyzer/sql/rule_manager.py new file mode 100644 index 00000000..259369c2 --- /dev/null +++ b/handler/analyzer/sql/rule_manager.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/6/11 +@file: rule_manager.py +@desc: +""" + +from typing import Dict, Type, List +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from handler.analyzer.sql.rules.review.arithmetic import ArithmeticRule +from handler.analyzer.sql.rules.review.full_scan import FullScanRule +from handler.analyzer.sql.rules.review.is_null import IsNullRule +from handler.analyzer.sql.rules.review.large_in_clause import LargeInClauseAdjustedRule +from handler.analyzer.sql.rules.review.multi_table_join import MultiTableJoinRule +from handler.analyzer.sql.rules.review.select_all import SelectAllRule +from handler.analyzer.sql.rules.review.update_delete_multi_table import UpdateDeleteMultiTableRule +from handler.analyzer.sql.rules.review.update_delete_without_where_or_true_condition import UpdateDeleteWithoutWhereOrTrueConditionRule +from handler.analyzer.sql.rules.level import Level + + +class RuleManager(object): + def __init__(self): + self._registered_rules: Dict[str, Type[AbstractRule]] = {} + + def register_rule(self, rule_class: Type[AbstractRule]): + """ + 注册一个新的规则类。 + :param rule_class: 规则类的类型。 + """ + self._registered_rules[rule_class.rule_name] = rule_class + + def analyze_sql_statement(self, sql_statement, level_str='notice') -> List[Result]: + """ + 对SQL语句列表应用所有已注册的规则,并收集结果。 + :param sql_statements: SQL语句的列表。 + :return: 二维列表,每个内部列表包含对应SQL语句的所有规则检查结果。 + """ + level = Level.from_string(level_str) + rule_results = [] + for rule_class in self._registered_rules.values(): + rule_instance = rule_class() + result = rule_instance.match(sql_statement) + suggestion = rule_instance.suggestion(sql_statement) + if result: + if suggestion.level >= level: + rule_results.append(suggestion) + else: + if level <= Level.OK: + suggestion = Result(rule_class.rule_name, Level.OK, "No issues found with this rule.") + rule_results.append(suggestion) + return rule_results + + +class SQLReviewRuleManager(object): + def __init__(self): + self.manager = RuleManager() + self.manager.register_rule(SelectAllRule) + self.manager.register_rule(ArithmeticRule) + self.manager.register_rule(FullScanRule) + self.manager.register_rule(IsNullRule) + self.manager.register_rule(LargeInClauseAdjustedRule) + self.manager.register_rule(MultiTableJoinRule) + self.manager.register_rule(UpdateDeleteMultiTableRule) + self.manager.register_rule(UpdateDeleteWithoutWhereOrTrueConditionRule) diff --git a/handler/analyzer/sql/rules/__init__.py b/handler/analyzer/sql/rules/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/rules/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/rules/abstract_rule.py b/handler/analyzer/sql/rules/abstract_rule.py new file mode 100644 index 00000000..c5897c34 --- /dev/null +++ b/handler/analyzer/sql/rules/abstract_rule.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/22 +@file: abstract_rule.py +@desc: +""" + +from abc import ABCMeta, abstractmethod + +from sqlgpt_parser.parser.tree.statement import Statement + + +class AbstractRule(metaclass=ABCMeta): + def match(self, root: Statement, context=None) -> bool: + return True + + @abstractmethod + def suggestion(self, root: Statement, context=None): + pass diff --git a/handler/analyzer/sql/rules/level.py b/handler/analyzer/sql/rules/level.py new file mode 100644 index 00000000..bba0ec6c --- /dev/null +++ b/handler/analyzer/sql/rules/level.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/22 +@file: level.py +@desc: +""" + +from enum import Enum, unique + + +@unique +class Level(Enum): + OK = (1, 'ok') + NOTICE = (2, 'notice') + WARN = (3, 'warn') + CRITICAL = (4, 'critical') + + def __lt__(self, other): + if self.__class__ is other.__class__: + return self.value[0] < other.value[0] + return NotImplemented + + def __le__(self, other): + if self.__class__ is other.__class__: + return self.value[0] <= other.value[0] + return NotImplemented + + def __gt__(self, other): + if self.__class__ is other.__class__: + return self.value[0] > other.value[0] + return NotImplemented + + def __ge__(self, other): + if self.__class__ is other.__class__: + return self.value[0] >= other.value[0] + return NotImplemented + + @classmethod + def from_string(cls, s): + for member in cls: + if member.value[1] == s: + return member + raise ValueError(f"No such level: {s}") + + @property + def string(self): + return self.value[1] diff --git a/handler/analyzer/sql/rules/result.py b/handler/analyzer/sql/rules/result.py new file mode 100644 index 00000000..eaf627a4 --- /dev/null +++ b/handler/analyzer/sql/rules/result.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: result.py +@desc: +""" +import json + + +class Result(object): + def __init__(self, name, level, suggestion): + self.class_name = name + self.rule_name = name + self.level = level + self.suggestion = suggestion + self.description = suggestion + + def __str__(self): + return json.dumps({"class_name": self.rule_name, "rule_name": self.rule_name, "level": self.level.value, "suggestion": self.suggestion, "description": self.description}, indent=5) diff --git a/handler/analyzer/sql/rules/review/__init__.py b/handler/analyzer/sql/rules/review/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/rules/review/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/rules/review/arithmetic.py b/handler/analyzer/sql/rules/review/arithmetic.py new file mode 100644 index 00000000..4fa9ea6d --- /dev/null +++ b/handler/analyzer/sql/rules/review/arithmetic.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: arithmetic.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class ArithmeticRule(AbstractRule): + rule_name = "arithmetic_rule" + rule_description = """ + Field operations are not recommended. + Example: a + 1 > 2 => a > 2 - 1 + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_arithmetic_binary(self, node, context): + if isinstance(node.left, QualifiedNameReference) or isinstance(node.right, QualifiedNameReference): + self.match = True + + visitor = Visitor() + visitor.process(root, None) + + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if not self.match(root, catalog): + return Result(self.rule_name, Level.OK, "No improper field operations detected, query is optimized.") + else: + return Result(self.rule_name, Level.NOTICE, self.rule_description) diff --git a/handler/analyzer/sql/rules/review/full_scan.py b/handler/analyzer/sql/rules/review/full_scan.py new file mode 100644 index 00000000..38aec1fc --- /dev/null +++ b/handler/analyzer/sql/rules/review/full_scan.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: select_all.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.result import Result +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from sqlgpt_parser.parser.tree.literal import StringLiteral +from sqlgpt_parser.parser.tree.statement import Statement + + +class FullScanRule(AbstractRule): + rule_name = "full_scan_rule" + rule_description = """ + Online query full table scan is not recommended. + Exceptions are: + 1. very small table + 2. very low frequency + 3. the table/result set returned is very small (within 100 records / 100 KB). + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + match: + select 1 from a + select 1 from a where b != / <> + select 1 from a where b not like + select 1 from a where b not in + select 1 from a where not exists + select 1 from a where b like %a / %a% + + not match: + select * from a left join b on (a.id = b.id) and a.c=1 + + :param root: + :param catalog: + :return: + """ + + # Remove clauses such as exists / != / <> / not in / not like / like %a + class Remove_Visitor(DefaultTraversalVisitor): + def visit_comparison_expression(self, node, context): + type = node.type + if type in ('!=', '<>'): + node.left = None + node.right = None + node.type = None + else: + self.process(node.left, context) + self.process(node.right, context) + return None + + def visis_in_predicate(self, node, context): + if node.is_not: + node.is_not = None + node.value = None + node.value_list = None + return None + + def visit_like_predicate(self, node, context): + process_flag = True + + pattern = node.pattern + + if isinstance(pattern, StringLiteral): + value = pattern.value + if value.startswith('%') or node.is_not: + process_flag = False + node.pattern = None + node.value = None + node.escape = None + + if process_flag: + self.process(node.value, context) + self.process(node.pattern, context) + if node.escape is not None: + self.process(node.escape, context) + return None + + # Determine whether there is a expression that can extract query range, if there is, it is not a full table scan + class Query_Range_Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = True + + def visit_comparison_expression(self, node, context): + type = node.type + if type and type in ('=', '>', '<', '>=', '<='): + self.match = False + if node.left: + self.process(node.left, context) + if node.right: + self.process(node.right, context) + return None + + def visit_in_predicate(self, node, context): + if node.is_not: + # Even though it's a NOT IN, it still suggests a non-full scan attempt + self.match = False + else: + self.process(node.value, context) + self.process(node.value_list, context) + + def visit_like_predicate(self, node, context): + if node.pattern and node.value: + pattern = node.pattern + if isinstance(pattern, StringLiteral): + value = pattern.value + if value.endswith('%'): + self.match = False + + if node.value: + self.process(node.value, context) + if node.pattern: + self.process(node.pattern, context) + if node.escape: + self.process(node.escape, context) + return None + + # Add handling for NOT EXISTS + def visit_exists_predicate(self, node, context): + if node.is_not: + # NOT EXISTS can also imply a specific range consideration + self.match = False + else: + self.process(node.subquery, context) + + def visit_between_predicate(self, node, context): + if not node.is_not: + self.match = False + + self.process(node.value, context) + self.process(node.min, context) + self.process(node.max, context) + + return None + + def visit_not_expression(self, node, context): + node.value = None + self.match = True + return None + + remove_visitor = Remove_Visitor() + remove_visitor.process(root, None) + + query_range_visitor = Query_Range_Visitor() + query_range_visitor.process(root, None) + + return query_range_visitor.match + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = "Detected a potential full table scan which may impact performance. " "Consider adding indexes, refining WHERE clauses, or restructuring the query to utilize existing indexes." + return Result(self.rule_name, Level.WARN, suggestion_text) + else: + suggestion_text = "The query does not involve a full table scan. It appears to be well-optimized for the given conditions." + return Result(self.rule_name, Level.OK, suggestion_text) diff --git a/handler/analyzer/sql/rules/review/is_null.py b/handler/analyzer/sql/rules/review/is_null.py new file mode 100644 index 00000000..3e76c451 --- /dev/null +++ b/handler/analyzer/sql/rules/review/is_null.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: is_null.py +@desc: +""" + +from sqlgpt_parser.parser.tree.literal import NullLiteral +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.statement import Statement +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class IsNullRule(AbstractRule): + rule_name = "is_null_rule" + rule_description = """ + Use IS NULL to determine whether it is a NULL value + A direct comparison of NULL to any value is NULL. +  1) The return result of NULL<>NULL is NULL, not false. +  2) The return result of NULL=NULL is NULL, not true. +  3) The return result of NULL<>1 is NULL, not true. + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + NULL<>、<>NULL、=NULL、NULL= + :param root: + :param catalog: + :return: + """ + + # NULL<>、<>NULL、=NULL、NULL=、!=NULL、 NULL!= + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_comparison_expression(self, node, context): + if isinstance(node.left, NullLiteral): + self.match = True + if isinstance(node.right, NullLiteral): + self.match = True + return None + + visitor = Visitor() + visitor.process(root, None) + + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if self.match(root, catalog): + # 如果发现不正确的NULL比较,提供具体的修改建议 + suggestion_text = "Detected comparison with NULL using =, !=, or <>. " "Use 'IS NULL' or 'IS NOT NULL' for correct NULL checks." + return Result(self.rule_name, Level.WARN, suggestion_text) + else: + # 如果没有发现不正确比较,返回OK状态 + return Result(self.rule_name, Level.OK, "No improper NULL comparisons found.") diff --git a/handler/analyzer/sql/rules/review/large_in_clause.py b/handler/analyzer/sql/rules/review/large_in_clause.py new file mode 100644 index 00000000..772d45a0 --- /dev/null +++ b/handler/analyzer/sql/rules/review/large_in_clause.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + + +""" +@time: 2024/5/24 +@file: large_in_clause_adjusted.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from sqlgpt_parser.parser.tree.statement import Statement + + +class LargeInClauseAdjustedRule(AbstractRule): + rule_name = "large_in_clause_rule_adjusted" + rule_description = """ + Avoid using IN clauses with more than 200 elements as it may lead to performance issues. + """ + + MAX_IN_ELEMENTS = 200 + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_in_predicate(self, node, context): + # Assuming node.values holds the list of values directly or indirectly; adjust based on actual implementation + if hasattr(node, 'value_list'): + if len(node.value_list.values) > LargeInClauseAdjustedRule.MAX_IN_ELEMENTS: + self.match = True + return self.match + return self.match + + visitor = Visitor() + visitor.process(root, None) + + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if self.match(root, catalog): + return Result( + self.rule_name, + Level.WARN, + f"The IN clause contains more than {LargeInClauseAdjustedRule.MAX_IN_ELEMENTS} elements, which may degrade query performance. " "Consider alternative strategies like breaking the query into smaller chunks or using EXISTS/JOIN clauses.", + ) + else: + return Result(self.rule_name, Level.OK, "The IN clause does not exceed the recommended number of elements.") diff --git a/handler/analyzer/sql/rules/review/multi_table_join.py b/handler/analyzer/sql/rules/review/multi_table_join.py new file mode 100644 index 00000000..55daa90f --- /dev/null +++ b/handler/analyzer/sql/rules/review/multi_table_join.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: multi_table_join.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.join_criteria import JoinOn, JoinUsing +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class MultiTableJoinRule(AbstractRule): + rule_name = "multi_table_join_rule" + rule_description = """ + The number of association tables is not recommended to exceed 5 + """ + + def match(self, root: Statement, catalog=None) -> bool: + + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.join_count = 0 + + def visit_join(self, node, context): + self.join_count = self.join_count + 1 + + if self.join_count >= 5: + self.match = True + else: + self.process(node.left, context) + self.process(node.right, context) + + if isinstance(node.criteria, JoinOn): + self.process(node.criteria.expression, context) + elif isinstance(node.criteria, JoinUsing): + self.process(node.criteria.columns) + + return None + + visitor = Visitor() + visitor.process(root, None) + + return visitor.match + + def get_join_count(self, root: Statement) -> int: + """Helper method to count the number of JOIN operations in the statement.""" + + class CountJoinVisitor(DefaultTraversalVisitor): + def __init__(self): + self.join_count = 0 + + def visit_join(self, node, context): + self.join_count += 1 + self.process(node.left, context) + self.process(node.right, context) + + if isinstance(node.criteria, JoinOn): + self.process(node.criteria.expression, context) + elif isinstance(node.criteria, JoinUsing): + self.process(node.criteria.columns) + + visitor = CountJoinVisitor() + visitor.process(root, None) + return visitor.join_count + + def suggestion(self, root: Statement, catalog=None) -> Result: + join_count = self.get_join_count(root) + if join_count > 5: + # 如果关联表数量超过5,提供具体的改进建议 + suggestion_text = ( + f"The query involves {join_count} tables in JOIN operations, exceeding the recommended limit of 3.\n" + "Consider the following optimizations:\n" + "- Break the query into smaller, simpler queries and use application-side processing to combine results.\n" + "- Review the schema design; denormalization or indexed views might reduce the need for complex joins.\n" + "- Ensure all joined columns are properly indexed for involved tables.\n" + "- If applicable, consider using materialized views or caching strategies for frequently accessed subsets of data." + ) + return Result(self.rule_name, Level.WARN, suggestion_text) + else: + # 如果没有超过,说明查询在推荐范围内 + suggestion_text = "The number of joined tables is within the recommended limit. No further action needed." + return Result(self.rule_name, Level.OK, suggestion_text) diff --git a/handler/analyzer/sql/rules/review/select_all.py b/handler/analyzer/sql/rules/review/select_all.py new file mode 100644 index 00000000..81934ce0 --- /dev/null +++ b/handler/analyzer/sql/rules/review/select_all.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: full_scan.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.qualified_name import QualifiedName +from sqlgpt_parser.parser.tree.select_item import SingleColumn +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.result import Result +from handler.analyzer.sql.rules.abstract_rule import AbstractRule + + +class SelectAllRule(AbstractRule): + rule_name = "select_all_rule" + rule_description = """ + select * + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.is_select_all = False + + def visit_select(self, node, context): + for item in node.select_items: + if isinstance(item, SingleColumn) and isinstance(item.expression, QualifiedNameReference) and isinstance(item.expression.name, QualifiedName): + parts = item.expression.name.parts + for part in parts: + if part == '*': + self.is_select_all = True + break + + visitor = Visitor() + visitor.process(root, None) + + return visitor.is_select_all + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = "Using 'SELECT *' can lead to unnecessary data retrieval and potentially impact query performance. " "Consider specifying only the necessary columns explicitly to optimize your query." + return Result(self.rule_name, Level.WARN, suggestion_text) + else: + return Result(self.rule_name, Level.OK, "No 'SELECT *' usage detected, query is optimized for column selection.") diff --git a/handler/analyzer/sql/rules/review/update_delete_multi_table.py b/handler/analyzer/sql/rules/review/update_delete_multi_table.py new file mode 100644 index 00000000..e8d9b919 --- /dev/null +++ b/handler/analyzer/sql/rules/review/update_delete_multi_table.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: update_delete_multi_table.py +@desc: +""" + +from sqlgpt_parser.parser.tree.relation import Join +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.level import Level +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class UpdateDeleteMultiTableRule(AbstractRule): + rule_name = "update_delete_multi_table_rule" + rule_description = """ + UPDATE / DELETE does not recommend using multiple tables + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + :param root: + :param catalog: + :return: + """ + + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_delete(self, node, context): + table = node.table + if table and isinstance(table[0], Join): + self.match = True + + def visit_update(self, node, context): + table = node.table + if table and isinstance(table[0], Join): + self.match = True + + visitor = Visitor() + visitor.process(root, None) + + return visitor.match + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = ( + "The use of multiple tables in UPDATE or DELETE operation is not recommended. " "Consider breaking down the operation into separate single-table statements or " "using transactions to manage the update/delete across multiple tables safely." + ) + return Result(self.rule_name, Level.WARN, suggestion_text) + else: + return Result(self.rule_name, Level.OK, "No multi-table UPDATE or DELETE operation detected, following best practices.") diff --git a/handler/analyzer/sql/rules/review/update_delete_without_where_or_true_condition.py b/handler/analyzer/sql/rules/review/update_delete_without_where_or_true_condition.py new file mode 100644 index 00000000..5ef91b91 --- /dev/null +++ b/handler/analyzer/sql/rules/review/update_delete_without_where_or_true_condition.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: update_delete_without_where_or_true_condition.py +@desc: +""" + +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.expression import ComparisonExpression +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.level import Level +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class UpdateDeleteWithoutWhereOrTrueConditionRule(AbstractRule): + rule_name = "update_delete_without_where_or_true_condition_rule" + rule_description = """ + UPDATE or DELETE statements should not be executed without a WHERE clause or with a always-true WHERE condition. + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + :param root: + :param catalog: + :return: + """ + + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.visited_where = False + + def visit_update(self, node, context): + self.check_for_where(node.where) + + def visit_delete(self, node, context): + self.check_for_where(node.where) + + def check_for_where(self, where): + if where is None: + # No WHERE clause found + self.match = True + elif isinstance(where, ComparisonExpression) and where.left == where.right: + # WHERE clause exists but is always true + self.match = True + else: + # Valid WHERE clause found + self.visited_where = True + + visitor = Visitor() + visitor.process(root, None) + + # Only consider it a match if there was no valid WHERE clause encountered + return visitor.match and not visitor.visited_where + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = ( + "Executing UPDATE or DELETE statements without a WHERE clause or with an always-true WHERE condition " + "can be extremely dangerous, potentially affecting all rows in the table. Please ensure a proper and " + "specific WHERE condition is used to limit the scope of the operation." + ) + return Result(self.rule_name, Level.CRITICAL, suggestion_text) + else: + return Result(self.rule_name, Level.OK, "UPDATE or DELETE operations include a WHERE clause with a specific condition, adhering to best practices.") diff --git a/handler/analyzer/sql/rules/tunning/__init__.py b/handler/analyzer/sql/rules/tunning/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/rules/tunning/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/rules/tunning/index_column_fuzzy_match.py b/handler/analyzer/sql/rules/tunning/index_column_fuzzy_match.py new file mode 100644 index 00000000..fa04186c --- /dev/null +++ b/handler/analyzer/sql/rules/tunning/index_column_fuzzy_match.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: index_column_fuzzy_search.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from sqlgpt_parser.parser.tree.statement import Statement + + +class IndexColumnFuzzyMatchRule(AbstractRule): + rule_name = "index_column_fuzzy_match_rule" + rule_description = """ + Avoid using fuzzy or left fuzzy matches on indexed columns in query conditions + as it may lead to performance degradation. + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.fuzzy_matches_on_indexed_columns = [] + + def visit_like_expression(self, node, context): + # Assuming we have a mechanism to identify indexed columns, e.g., via `catalog` + if catalog and isinstance(node.expression, QualifiedNameReference): + column_name = str(node.expression.name) + if catalog.is_column_indexed(column_name): + if node.pattern.startswith('%'): # Left fuzzy match + self.fuzzy_matches_on_indexed_columns.append(column_name) + self.match = True + elif '%' in node.pattern and not node.pattern.endswith('%'): # Fuzzy match but not left + self.fuzzy_matches_on_indexed_columns.append(column_name) + self.match = True + return self.match + + visitor = Visitor() + visitor.process(root, catalog) + + if visitor.match: + self._fuzzy_matched_columns = visitor.fuzzy_matches_on_indexed_columns + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if hasattr(self, '_fuzzy_matched_columns') and self._fuzzy_matched_columns: + column_list = ", ".join(self._fuzzy_matched_columns) + detailed_suggestion = f"Avoid using fuzzy or left fuzzy matches on these indexed columns: {column_list}" + return Result(self.rule_name, Level.WARN, self.rule_description + "\n" + detailed_suggestion) + else: + return Result(self.rule_name, Level.OK, "No issues found with indexed column fuzzy matching.") diff --git a/handler/analyzer/sql/rules/tunning/index_column_implicit_conversion.py b/handler/analyzer/sql/rules/tunning/index_column_implicit_conversion.py new file mode 100644 index 00000000..e95e79ec --- /dev/null +++ b/handler/analyzer/sql/rules/tunning/index_column_implicit_conversion.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/23 +@file: index_column_implicit_conversion.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from sqlgpt_parser.parser.tree.statement import Statement + + +class IndexColumnImplicitConversionRule(AbstractRule): + rule_name = "index_column_implicit_conversion_rule" + rule_description = """ + Detect potential implicit type conversions on indexed columns in query conditions + due to comparison or arithmetic operations with different types, which may degrade index efficiency. + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.conversion_warnings = [] + + def visit_comparison_expression(self, node, context): + if catalog: + left_is_col = isinstance(node.left, QualifiedNameReference) and catalog.is_column_indexed(str(node.left.name)) + right_is_col = isinstance(node.right, QualifiedNameReference) and catalog.is_column_indexed(str(node.right.name)) + + if left_is_col or right_is_col: + # Check for type mismatch that could lead to implicit conversion + if left_is_col and not isinstance(node.right, type(node.left)) or right_is_col and not isinstance(node.left, type(node.right)): + col_name = left_is_col and str(node.left.name) or str(node.right.name) + self.conversion_warnings.append(f"Implicit type conversion warning on indexed column '{col_name}'.") + self.match = True + + return self.match + + def visit_arithmetic_binary_expression(self, node, context): + if catalog: + for expr in [node.left, node.right]: + if isinstance(expr, QualifiedNameReference) and catalog.is_column_indexed(str(expr.name)): + # If the other side is not the same type, it might suggest an implicit conversion + if not isinstance(node.left, type(expr)) or not isinstance(node.right, type(expr)): + col_name = str(expr.name) + self.conversion_warnings.append(f"Implicit type conversion warning on indexed column '{col_name}' due to arithmetic operation.") + self.match = True + + return self.match + + visitor = Visitor() + visitor.process(root, catalog) + + if visitor.match: + self._conversion_warnings_details = visitor.conversion_warnings + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if hasattr(self, '_conversion_warnings_details') and self._conversion_warnings_details: + issue_list = "\n".join(self._conversion_warnings_details) + detailed_suggestion = f"The following indexed columns may be involved in implicit type conversions due to comparison or arithmetic operations:\n{issue_list}\nReview these to ensure optimal index usage." + return Result(self.rule_name, Level.WARN, self.rule_description + "\n" + detailed_suggestion) + else: + return Result(self.rule_name, Level.OK, "No implicit type conversion warnings found for indexed columns.") diff --git a/handler/checker/check_handler.py b/handler/checker/check_handler.py index b7fa207a..992318f0 100644 --- a/handler/checker/check_handler.py +++ b/handler/checker/check_handler.py @@ -108,7 +108,7 @@ def __init__(self, context, check_target_type="observer"): new_node.append(node) self.nodes = new_node self.version = get_version(self.nodes, self.check_target_type, self.cluster, self.stdio) - + obConnectorPool = None # add OBConnectorPool try: obConnectorPool = checkOBConnectorPool(context, 3, self.cluster) @@ -184,8 +184,11 @@ def get_all_tasks(self): if file.endswith('.yaml'): folder_name = os.path.basename(root) task_name = "{}.{}".format(folder_name, file.split('.')[0]) - task_data = YamlUtils.read_yaml_data(os.path.join(root, file)) - tasks[task_name] = task_data + with open(os.path.join(root, file), 'r', encoding='utf-8') as f: + task_data = yaml.safe_load(f) + if task_data is None: + continue + tasks[task_name] = task_data if len(tasks) == 0: raise Exception("the len of tasks is 0") self.tasks = tasks @@ -241,6 +244,7 @@ def execute(self): self.report.export_report() except CheckrReportException as e: self.stdio.error("Report error :{0}".format(e)) + self.stdio.verbose(traceback.format_exc()) except Exception as e: self.stdio.error("Internal error :{0}".format(e)) self.stdio.verbose(traceback.format_exc()) diff --git a/handler/checker/check_report.py b/handler/checker/check_report.py index eb69d337..1b0c478e 100644 --- a/handler/checker/check_report.py +++ b/handler/checker/check_report.py @@ -83,7 +83,7 @@ def get_report_path(self): def export_report_xml(self): allMap = self.report_tobeMap() - with open(self.report_path + ".xml", 'w', encoding="utf8") as f: + with open(self.report_path + ".xml", 'w', encoding='utf-8') as f: allreport = {} allreport["report"] = allMap json_str = json.dumps(allreport) @@ -93,13 +93,13 @@ def export_report_xml(self): def export_report_yaml(self): allMap = self.report_tobeMap() - with open(self.report_path + ".yaml", 'w', encoding="utf8") as f: + with open(self.report_path + ".yaml", 'w', encoding='utf-8') as f: yaml.dump(allMap, f) def export_report_json(self): allMap = self.report_tobeMap() self.stdio.verbose("export_report_json allMap: {0}".format(allMap)) - with open(self.report_path + ".json", 'w', encoding="utf8") as f: + with open(self.report_path + ".json", 'w', encoding='utf-8') as f: # for python2 and python3 try: json.dump(allMap, f, ensure_ascii=False) @@ -167,7 +167,7 @@ def export_report_table(self): report_all_tb.add_row([task.name, "all pass"]) telemetry.push_check_info(self.report_target, {"fail_cases": list(set(failMap)), "critical_cases": list(set(criticalMap)), "warning_cases": list(set(warningMap))}) - fp = open(self.report_path + ".table", 'a+', encoding="utf8") + fp = open(self.report_path + ".table", 'a+', encoding='utf-8') if len(report_fail_tb._rows) != 0: self.stdio.verbose(report_fail_tb) diff --git a/handler/meta/html_meta.py b/handler/meta/html_meta.py index 302de58c..81371e11 100644 --- a/handler/meta/html_meta.py +++ b/handler/meta/html_meta.py @@ -287,3 +287,147 @@ def rm_value(self, key): ''', ) + +html_dict.set_value( + "sql_review_html_head_template", + ''' + + + + + + SQL Review报告 + + + +

SQL Review报告

+ ''', +) + + +html_dict.set_value( + "analyze_sql_html_head_template", + ''' + + + + + + 租户SQL诊断报告 + + + + +

SQL 诊断报告

+ ''', +) + +html_dict.set_value( + "html_footer_temple", + ''' + + + + ''', +) diff --git a/handler/meta/sql_meta.py b/handler/meta/sql_meta.py index 7538e7cf..922173ac 100644 --- a/handler/meta/sql_meta.py +++ b/handler/meta/sql_meta.py @@ -1003,3 +1003,395 @@ def rm_value(self, key): ##REPLACE_ORDER_BY##; ''', ) + +sql_dict.set_value( + "select_all_gv_database_view", + ''' + SELECT /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(60000000) */ + tenant_id, + tenant_name, + database_id, + database_name, + `comment`, + in_recyclebin + FROM oceanbase.`gv$database` + ''', +) + +sql_dict.set_value( + "select_cdb_database", + ''' + SELECT/*+ QUERY_TIMEOUT(10000000) */ + con_id as tenant_id, + object_id as database_id, + object_name as database_name + FROM oceanbase.cdb_objects + where + con_id = ##REPLACE_CON_ID## + and OBJECT_TYPE = 'DATABASE' + ''', +) + +sql_dict.set_value( + "select_tenant_cdb_database", + ''' + SELECT/*+ QUERY_TIMEOUT(10000000) */ + con_id as tenant_id, + object_id as database_id, + object_name as database_name + FROM oceanbase.DBA_OBJECTS + where OBJECT_TYPE = 'DATABASE' + ''', +) + +sql_dict.set_value( + "select_tenant_gv_database_view", + ''' + SELECT /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(60000000) */ + tenant_id, + tenant_name, + database_id, + database_name, + `comment`, + in_recyclebin + FROM oceanbase.`gv$database` + WHERE tenant_id = ##REPLACE_TENANT_ID## + ''', +) + +sql_dict.set_value( + "get_table_id", + ''' + select + table_id + from oceanbase.gv$table + where + tenant_id = ? + and database_id = ##REPLACE_DATABASE_ID## + and table_name = '##REPLACE_TABLE_NAME' + limit 1 + ''', +) + +sql_dict.set_value( + "get_table_id_for_ob4", + ''' + select + t3.table_id as table_id + from (select con_id, owner, table_name, partitioned from oceanbase.CDB_TABLES) t1 + left join (select con_id, owner, object_name, object_id from oceanbase.CDB_OBJECTS where object_type = 'database') t2 ON t1.con_id =t2.con_id and t1.owner = t2.owner + left join (select con_id, owner, object_name, object_id as table_id from oceanbase.CDB_OBJECTS where object_type = 'table') t3 ON t1.con_id = t3.con_id and t1.owner = t3.owner and t1.table_name = t3.object_name + where t1.con_id = ##REPLACE_CON_ID## and t2.object_id = ##REPLACE_OBJECT_ID## t1.table_name = ##REPLACE_TABLE_NAME## limit 1 + ''', +) + +sql_dict.set_value( + "get_table_index", + ''' +select + key_name as index_name, + group_concat(column_name order by seq_in_index separator ',') as column_name + from + oceanbase.__tenant_virtual_table_index + where + table_id = ##REPLACE_TABLE_ID## + group by key_name + ''', +) + +sql_dict.set_value( + "get_database_name", + ''' +select + database_name + from oceanbase.gv$database + where + tenant_id = ##REPLACE_TENANT_ID## + and database_id = ##REPLACE_DATABASE_ID## + limit 1 + ''', +) + +sql_dict.set_value( + "get_sql_audit_for_sql_review", + ''' +select /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(120000000) */ + max(case when length(sql_id) > 0 then svr_ip else 0 end) as svrIp, + max(case when length(sql_id) > 0 then svr_port else 0 end) as svrPort, + max(case when length(sql_id) > 0 then request_id else 0 end) as requestId, + max(case when length(sql_id) > 0 then client_ip else 0 end) as clientIp, + max(case when length(sql_id) > 0 then tenant_name else 0 end) as tenantName, + max(case when length(sql_id) > 0 then tenant_id else 0 end) as tenantId, + max(case when length(sql_id) > 0 then db_name else 0 end) as dbName, + max(case when length(sql_id) > 0 then db_id else 0 end) as dbId, + max(case when length(sql_id) > 0 then query_sql else 0 end) as querySql, + max(case when length(sql_id) > 0 then plan_id else 0 end) as planId, + max(case when length(sql_id) > 0 then sql_id else '' end) as sqlId, + max(case when length(sql_id) > 0 then trace_id else '' end) as traceId, + min(request_time) as requestTime, + sum(case when length(sql_id) > 0 then return_rows else 0 end) as returnRows, + sum(case when length(sql_id) > 0 then affected_rows else 0 end) as affectedRows, + sum(partition_cnt) as partitionCount, + sum(case when length(sql_id) > 0 then ret_code else 0 end) as retCode, + sum(case event when 'system internal wait' then WAIT_TIME_MICRO else 0 end) as event0WaitTimeUs, + sum(case event when 'mysql response wait client' then WAIT_TIME_MICRO else 0 end) as event1WaitTimeUs, + sum(case event when 'sync rpc' then WAIT_TIME_MICRO else 0 end) as event2WaitTimeUs, + sum(case event when 'db file data read' then WAIT_TIME_MICRO else 0 end) as event3WaitTimeUs, + sum(total_wait_time_micro) as totalWaitTimeMicro, + sum(total_waits) as totalWaits, + sum(rpc_count) as rpcCount, + sum(case when length(sql_id) > 0 then plan_type else 0 end) as planType, + sum(case when length(sql_id) > 0 then is_inner_sql else 0 end) as isInnerSql, + sum(case when length(sql_id) > 0 then is_executor_rpc else 0 end) as isExecutorRpc, + sum(case when length(sql_id) > 0 then is_hit_plan else 0 end) as isHitPlan, + sum(case when length(sql_id) > 0 then elapsed_time else 0 end) as elapsedTime, + sum(execute_time)-sum(total_wait_time_micro)+sum(get_plan_time) as cpuTime, + sum(net_time) as netTime, + sum(net_wait_time) as netWaitTime, + sum(queue_time) as queueTime, + sum(decode_time) as decodeTime, + sum(get_plan_time) as getPlanTime, + sum(execute_time) as executeTime, + sum(application_wait_time) as applicationWaitTime, + sum(concurrency_wait_time) as concurrencyWaitTime, + sum(user_io_wait_time) as userIoWaitTime, + sum(schedule_time) as scheduleTime, + sum(row_cache_hit) as rowCacheHit, + sum(bloom_filter_cache_hit) as bloomFilterCacheHit, + sum(block_cache_hit) as blockCacheHit, + sum(block_index_cache_hit) as blockIndexCacheHit, + sum(disk_reads) as diskReads, + sum(case when length(sql_id) > 0 then retry_cnt else 0 end) as retryCount, + sum(case when length(sql_id) > 0 then table_scan else 0 end) as tableScan, + sum(case when length(sql_id) > 0 then consistency_level else 0 end) as consistencyLevel, + sum(memstore_read_row_count) as memstoreReadRowCount, + sum(ssstore_read_row_count) as ssstoreReadRowCount + from oceanbase.gv$sql_audit + where request_time >= ##REPLACE_REQUEST_FROM_TIME## + and request_time <= ##REPLACE_REQUEST_TO_TIME## + and length(sql_id) > 0 + and length(query_sql) > 0 + and length(db_name) > 0 + and query_sql not like 'show%' + and query_sql not like 'alter%' + and query_sql not like 'set%' + and query_sql not like 'commit%' + and query_sql not like 'roll%' + and query_sql not like 'begin%' + and query_sql not like 'end%' + and query_sql not like 'drop%' + group by trace_id + having elapsedTime >= ##REPLACE_ELAPSED_TIME## + and length(sqlId) > 0 + limit ##REPLACE_LIMIT## + ''', +) + +sql_dict.set_value( + "get_sql_audit_ob4_for_sql_review", + ''' +select /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(120000000) */ + max(case when length(sql_id) > 0 then svr_ip else 0 end) as svrIp, + max(case when length(sql_id) > 0 then svr_port else 0 end) as svrPort, + max(case when length(sql_id) > 0 then request_id else 0 end) as requestId, + max(case when length(sql_id) > 0 then client_ip else 0 end) as clientIp, + max(case when length(sql_id) > 0 then tenant_name else 0 end) as tenantName, + max(case when length(sql_id) > 0 then tenant_id else 0 end) as tenantId, + max(case when length(sql_id) > 0 then db_name else 0 end) as dbName, + max(case when length(sql_id) > 0 then db_id else 0 end) as dbId, + max(case when length(sql_id) > 0 then query_sql else 0 end) as querySql, + max(case when length(sql_id) > 0 then plan_id else 0 end) as planId, + max(case when length(sql_id) > 0 then sql_id else '' end) as sqlId, + max(case when length(sql_id) > 0 then trace_id else '' end) as traceId, + min(request_time) as requestTime, + sum(case when length(sql_id) > 0 then return_rows else 0 end) as returnRows, + sum(case when length(sql_id) > 0 then affected_rows else 0 end) as affectedRows, + sum(partition_cnt) as partitionCount, + sum(case when length(sql_id) > 0 then ret_code else 0 end) as retCode, + sum(case event when 'system internal wait' then WAIT_TIME_MICRO else 0 end) as event0WaitTimeUs, + sum(case event when 'mysql response wait client' then WAIT_TIME_MICRO else 0 end) as event1WaitTimeUs, + sum(case event when 'sync rpc' then WAIT_TIME_MICRO else 0 end) as event2WaitTimeUs, + sum(case event when 'db file data read' then WAIT_TIME_MICRO else 0 end) as event3WaitTimeUs, + sum(total_wait_time_micro) as totalWaitTimeMicro, + sum(total_waits) as totalWaits, + sum(rpc_count) as rpcCount, + sum(case when length(sql_id) > 0 then plan_type else 0 end) as planType, + sum(case when length(sql_id) > 0 then is_inner_sql else 0 end) as isInnerSql, + sum(case when length(sql_id) > 0 then is_executor_rpc else 0 end) as isExecutorRpc, + sum(case when length(sql_id) > 0 then is_hit_plan else 0 end) as isHitPlan, + max(case when length(sql_id) > 0 then elapsed_time else 0 end) as elapsedTime, + sum(execute_time) - sum(total_wait_time_micro) + max(get_plan_time) as cpuTime, + sum(net_time) as netTime, + sum(net_wait_time) as netWaitTime, + sum(queue_time) as queueTime, + sum(decode_time) as decodeTime, + sum(get_plan_time) as getPlanTime, + sum(execute_time) as executeTime, + sum(application_wait_time) as applicationWaitTime, + sum(concurrency_wait_time) as concurrencyWaitTime, + sum(user_io_wait_time) as userIoWaitTime, + sum(schedule_time) as scheduleTime, + sum(row_cache_hit) as rowCacheHit, + sum(bloom_filter_cache_hit) as bloomFilterCacheHit, + sum(block_cache_hit) as blockCacheHit, + 0 as blockIndexCacheHit, + sum(disk_reads) as diskReads, + sum(case when length(sql_id) > 0 then retry_cnt else 0 end) as retryCount, + sum(case when length(sql_id) > 0 then table_scan else 0 end) as tableScan, + sum(case when length(sql_id) > 0 then consistency_level else 0 end) as consistencyLevel, + sum(memstore_read_row_count) as memstoreReadRowCount, + sum(ssstore_read_row_count) as ssstoreReadRowCount + from oceanbase.gv$ob_sql_audit + where request_time >= ##REPLACE_REQUEST_FROM_TIME## + and request_time <= ##REPLACE_REQUEST_TO_TIME## + and length(sql_id) > 0 + and length(query_sql) > 0 + and length(db_name) > 0 + and query_sql not like 'show%' + and query_sql not like 'alter%' + and query_sql not like 'set%' + and query_sql not like 'commit%' + and query_sql not like 'roll%' + and query_sql not like 'begin%' + and query_sql not like 'end%' + and query_sql not like 'drop%' + and query_sql not like 'commit%' + and query_sql not like 'select 1%' + group by trace_id + having elapsedTime >= ##REPLACE_ELAPSED_TIME## + and length(sqlId) > 0 + limit ##REPLACE_LIMIT## + ''', +) + +sql_dict.set_value( + "get_plan_explains", + ''' +select /*+ READ_CONSISTENCY(WEAK) */ + plan_depth as planDepth, + plan_line_id as planLineId, + operator, name as objectName + from + oceanbase.gv$plan_cache_plan_explain + where + tenant_id = ##REPLACE_TENANT_ID## and ip = '##REPLACE_SVR_IP##' and port = ##REPLACE_SVR_PORT## and plan_id = ##REPLACE_PLAN_ID## + ''', +) + +sql_dict.set_value( + "get_plan_explains_for_ob4", + ''' +select /*+ READ_CONSISTENCY(WEAK) */ + plan_depth as planDepth, + plan_line_id as planLineId, + operator, + name as objectName, + rows, + cost + from + oceanbase.gv$ob_plan_cache_plan_explain + where + tenant_id = ##REPLACE_TENANT_ID## and svr_ip = '##REPLACE_SVR_IP##' and svr_port = ##REPLACE_SVR_PORT## and + plan_id = ##REPLACE_PLAN_ID## + ''', +) + +sql_dict.set_value( + "get_tables", + ''' +select + table_name as tableName, + table_id as tableId + from oceanbase.gv$table + where database_name = '##REPLACE_DATABASE_NAME##' limit 1 + ''', +) + +sql_dict.set_value( + "get_tables_for_ob4", + ''' +select + table_schema databaseName, + table_name tableName + from oceanbase.information_schema.tables + where table_schema = '##REPLACE_DATABASE_NAME##' and table_type='BASE TABLE' limit 1 + ''', +) + +sql_dict.set_value( + "get_tenants", + ''' +select + tenant_name as tenantName, + tenant_id as tenantId + from oceanbase.gv$tenant + ''', +) + +sql_dict.set_value( + "get_colum_list_on_lower_version", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + b.data_type dataType, + a.column_id columnId, + b.column_name columnName , + max(a.num_distinct) ndvCount + FROM + oceanbase.__all_column_statistic a, + oceanbase.__all_column b + WHERE + a.tenant_id=b.tenant_id + and a.table_id=b.table_id + and a.column_id=b.column_id + and b.column_name not like '%__substr%' + and a.tenant_id=? and a.table_id=? + ''', +) + +sql_dict.set_value( + "get_colum_list", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + b.data_type dataType, + a.column_id columnId, + b.column_name columnName, + max(a.num_distinct) ndvCount + FROM + oceanbase.__all_virtual_column_statistic a, + oceanbase.__all_virtual_column b + WHERE + a.tenant_id=b.tenant_id + and a.table_id=b.table_id + and a.column_id=b.column_id + and b.column_name not like '%__substr%' + and a.tenant_id=? and a.table_id=? + GROUP BY b.column_name + ''', +) + +sql_dict.set_value( + "get_column_min_and_max_value_list_on_lower_version", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + column_id columnId, + des_hex_str(case min_value when '19070000FDFFFFFFFFFFFFFFFF01' then '0' else min_value end) `minValue`, + des_hex_str(case max_value when '19070000FEFFFFFFFFFFFFFFFF01' then '0' else max_value end) `maxValue` + from + oceanbase.__all_column_statistic + where tenant_id = ? and table_id = ? + ''', +) + +sql_dict.set_value( + "get_column_min_and_max_value_list", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + column_id columnId, + des_hex_str(case min_value when '19070000FDFFFFFFFFFFFFFFFF01' then '0' else min_value end) `minValue`, + des_hex_str(case max_value when '19070000FEFFFFFFFFFFFFFFFF01' then '0' else max_value end) `maxValue` + from + oceanbase.__all_virtual_column_statistic + where + tenant_id = ? and table_id = ? + ''', +) diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index c896ecca..3a9af24d 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -18,7 +18,7 @@ _obdiag_completion() { fi ;; analyze) - type_list="log flt_trace" + type_list="log flt_trace sql sql_review" ;; rca) type_list="list run" diff --git a/requirements3.txt b/requirements3.txt index 027d73fe..ade61c6b 100644 --- a/requirements3.txt +++ b/requirements3.txt @@ -20,7 +20,7 @@ zstandard==0.17.0 pycryptodome~=3.14.1 pick==1.2.0 PyYAML==6.0 -prettytable==3.5.0 +prettytable==3.10.0 oyaml==1.0 xmltodict==0.13.0 subprocess32==3.5.4 @@ -34,4 +34,5 @@ ruamel.yaml==0.17.4 progressbar==2.5 halo==0.0.31 inspect2==0.1.2 -netifaces==0.11.0 \ No newline at end of file +sqlgpt-parser>=0.0.1a5 +netifaces==0.11.0 diff --git a/telemetry/telemetry.py b/telemetry/telemetry.py index a8a0a236..5c9e2f68 100644 --- a/telemetry/telemetry.py +++ b/telemetry/telemetry.py @@ -154,7 +154,7 @@ def put_data(self): re = {"content": report_data, "component": "obdiag"} # put to /tmp - with open(const.OBDIAG_TELEMETRY_FILE_NAME, 'w', encoding="utf8") as f: + with open(const.OBDIAG_TELEMETRY_FILE_NAME, 'w', encoding="UTF-8") as f: f.write(json.dumps(re, ensure_ascii=False)) self.put_info_to_oceanbase() diff --git a/test/analyzer/log/test_tree.py b/test/analyzer/log/test_tree.py new file mode 100644 index 00000000..f26520a9 --- /dev/null +++ b/test/analyzer/log/test_tree.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2023/12/09 +@file: test_tree.py +@desc: +""" + +from handler.analyzer.log_parser.tree import Tree + +file_datas = [ + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "1", "name": "open1", "id": "1", "parent_id": "00000000-0000-0000-0000-000000000000", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open2", "id": "2", "parent_id": "00000000-0000-0000-0000-000000000000", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233214}}, + { + 'host_ip': '192.168.1.1', + 'host_type': 'OBSERVER', + 'trace_data': {"trace_id": "x", "name": "open3", "id": "3", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662117166233214, "logs": "test log", "tags": "just a test"}, + }, + { + 'host_ip': '192.168.1.1', + 'host_type': 'OBSERVER', + 'trace_data': { + "trace_id": "x", + "name": "open6", + "id": "6", + "parent_id": "3", + "is_follow": "false", + "start_ts": 1662107166232204, + "end_ts": 1662108166233214, + "logs": "null", + "tags": [{"sql_text": "select * from t where c1=1"}, {"hit_plan": "false"}, {"sql_id": "XXXXXXXXXXXXXXXXXXXXXX"}, {"database_id": 111111111}], + }, + }, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open7", "id": "7", "parent_id": "6", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166433214, "logs": [{"end_ts": 1662107166433214}]}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open11", "id": "11", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107167233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open22", "id": "22", "parent_id": "2", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107173233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open12", "id": "12", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662117166233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open13", "id": "13", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233314}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open23", "id": "23", "parent_id": "2", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233314}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open32", "id": "32", "parent_id": "11", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166235214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open33", "id": "33", "parent_id": "11", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166283214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open41", "id": "41", "parent_id": "12", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166293214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open55", "id": "55", "parent_id": "32", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166291214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open56", "id": "56", "parent_id": "32", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107167233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open66", "id": "66", "parent_id": "41", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107266233214}}, + { + 'host_ip': '192.168.1.1', + 'host_type': 'OBSERVER', + 'trace_data': {"trace_id": "x", "name": "open67", "id": "67", "parent_id": "999999", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107966233214, "logs": "test log", "tags": "just a test"}, + }, +] + + +def output(tree): + if not tree.nodes: + print("The analysis result is empty") + return + for line in tree.traverse(10, 5): + print(line) + + +if __name__ == '__main__': + tree = Tree() + tree.build(file_datas) + tree.traverse(5, 5) + output(tree) diff --git a/test/analyzer/sql/test_arithmetic_rule.py b/test/analyzer/sql/test_arithmetic_rule.py new file mode 100644 index 00000000..860b6a53 --- /dev/null +++ b/test/analyzer/sql/test_arithmetic_rule.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_arithmetic_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.arithmetic import ArithmeticRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestArithmeticRuleWithRealSQL(unittest.TestCase): + + def setUp(self): + self.rule = ArithmeticRule() + self.parser = parser + + def test_arithmetic_operation_detected(self): + # SQL语句包含算术运算 + sql_with_arithmetic = "SELECT * FROM table1 WHERE column1 + 1 > 2" + parsed_stmt = self.parser.parse(sql_with_arithmetic) + result = self.rule.match(parsed_stmt, None) + self.assertTrue(result) + + def test_no_arithmetic_operation(self): + # SQL语句不包含算术运算 + sql_no_arithmetic = "SELECT * FROM table1 WHERE column1 > 2" + parsed_stmt = self.parser.parse(sql_no_arithmetic) + result = self.rule.match(parsed_stmt, None) + self.assertFalse(result) + + def test_suggestion_for_arithmetic_operation(self): + sql_with_arithmetic = "SELECT * FROM table1 WHERE column1 + 1 > 2" + parsed_stmt = self.parser.parse(sql_with_arithmetic) + result = self.rule.suggestion(parsed_stmt, None) + self.assertEqual(result.level, Level.NOTICE) + + def test_suggestion_without_arithmetic_operation(self): + sql_no_arithmetic = "SELECT * FROM table1 WHERE column1 > 2" + parsed_stmt = self.parser.parse(sql_no_arithmetic) + result = self.rule.suggestion(parsed_stmt, None) + self.assertEqual(result.level, Level.OK) + + def test_complex_arithmetic_operation_detected(self): + # 复杂SQL包含算术运算,并且嵌套在子查询中 + sql_complex = """ + SELECT t1.id + FROM table1 t1 + JOIN ( + SELECT id, column1 - column2 + 1 AS derived_col + FROM table2 + WHERE column3 * 2 < 10 + ) t2 ON t1.id = t2.id + WHERE t2.derived_col > 5 + """ + parsed_stmt = self.parser.parse(sql_complex) + result = self.rule.match(parsed_stmt, None) + self.assertTrue(result, "Should detect arithmetic operation in complex SQL statement.") + + def test_complex_no_arithmetic_operation(self): + # 复杂SQL,无算术运算,包含JOIN和子查询 + sql_complex_no_arithmetic = """ + SELECT t1.id + FROM table1 t1 + JOIN ( + SELECT id, column1 + FROM table2 + WHERE column3 < 10 + ) t2 ON t1.id = t2.id + WHERE t2.column1 > 5 + """ + parsed_stmt = self.parser.parse(sql_complex_no_arithmetic) + result = self.rule.match(parsed_stmt, None) + self.assertFalse(result, "Should not detect arithmetic operation in complex SQL statement.") + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_full_scan_rule.py b/test/analyzer/sql/test_full_scan_rule.py new file mode 100644 index 00000000..32461f4e --- /dev/null +++ b/test/analyzer/sql/test_full_scan_rule.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_full_scan_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.full_scan import FullScanRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestFullScanRule(unittest.TestCase): + + def setUp(self): + self.rule = FullScanRule() + + def test_full_scan_with_negation_but_filtered(self): + # SQL查询示例,包含否定条件,预期是全表扫描 + sql_filtered_negation = "SELECT * FROM users WHERE NOT (id BETWEEN 1 AND 10)" + parsed_stmt = parser.parse(sql_filtered_negation) + print(parsed_stmt) + self.assertTrue(self.rule.match(parsed_stmt)) + + def test_full_scan_with_like_pattern_full(self): + # SQL查询示例,使用LIKE且模式为%,预期是全表扫描 + sql_like_full = "SELECT * FROM users WHERE username LIKE '%zhangsan'" + parsed_stmt = parser.parse(sql_like_full) + print(parsed_stmt) + self.assertTrue(self.rule.match(parsed_stmt)) + # suggestion = self.rule.suggestion(parsed_stmt) + # self.assertEqual(suggestion.level, Level.WARN) + + def test_not_like_doesnt_hide_full_scan(self): + # SQL查询示例,使用NOT LIKE,模式明确限制了范围, 减少了结果集 + sql_not_like = "SELECT * FROM products WHERE name NOT LIKE '%apple%'" + parsed_stmt = parser.parse(sql_not_like) + print(parsed_stmt) + self.assertFalse(self.rule.match(parsed_stmt)) + + def test_not_in_doesnt_hide_full_scan(self): + # SQL查询示例,使用NOT IN,预期可能为全表扫描 + sql_not_in = "SELECT * FROM orders WHERE customerId NOT IN (SELECT customerId FROM active_customers)" + parsed_stmt = parser.parse(sql_not_in) + self.assertFalse(self.rule.match(parsed_stmt)) + + def test_not_exists_doesnt_hide_full_scan(self): + # SQL查询示例,使用NOT EXISTS,预期可能为全表扫描,需评估子查询的影响 + sql_not_exists = "SELECT * FROM orders O WHERE NOT EXISTS (SELECT 1 FROM shipped_orders S WHERE O.orderId = S.orderId)" + parsed_stmt = parser.parse(sql_not_exists) + print(parsed_stmt) + self.assertTrue(self.rule.match(parsed_stmt)) + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_not_equal_doesnt_hide_full_scan(self): + # SQL查询示例,使用!=,预期可能为全表扫描,除非值的筛选范围很窄 + sql_not_equal = "SELECT * FROM inventory WHERE stockLevel != 0" + parsed_stmt = parser.parse(sql_not_equal) + print(parsed_stmt) + self.assertTrue(self.rule.match(parsed_stmt)) + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_optimized_not_conditions(self): + # SQL查询示例,使用NOT条件 + sql_optimized_not = "SELECT * FROM users WHERE age NOT BETWEEN 18 AND 25" + parsed_stmt = parser.parse(sql_optimized_not) + self.assertTrue(self.rule.match(parsed_stmt)) + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_is_null_rule.py b/test/analyzer/sql/test_is_null_rule.py new file mode 100644 index 00000000..a23627c5 --- /dev/null +++ b/test/analyzer/sql/test_is_null_rule.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_is_null_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.is_null import IsNullRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestIsNullRule(unittest.TestCase): + + def setUp(self): + self.rule = IsNullRule() + + def test_improper_null_comparison(self): + # 测试不当的NULL值比较 + sqls = ["SELECT * FROM table1 WHERE column1 = NULL", "SELECT * FROM table1 WHERE column1 <> NULL", "SELECT * FROM table1 WHERE NULL = column1", "SELECT * FROM table1 WHERE NULL <> column1"] + + for sql in sqls: + parsed_stmt = parser.parse(sql) + self.assertTrue(self.rule.match(parsed_stmt), f"Expected to match for SQL: {sql}") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_proper_null_check(self): + # 测试正确的NULL值检查 + proper_sqls = ["SELECT * FROM table1 WHERE column1 IS NULL", "SELECT * FROM table1 WHERE column1 IS NOT NULL"] + + for sql in proper_sqls: + parsed_stmt = parser.parse(sql) + self.assertFalse(self.rule.match(parsed_stmt), f"Should not match for SQL: {sql}") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_mixed_query(self): + # 混合了适当与不适当的NULL比较 + sql = "SELECT * FROM table1 WHERE column1 IS NULL OR column2 = NULL" + parsed_stmt = parser.parse(sql) + self.assertTrue(self.rule.match(parsed_stmt), "Expected to match due to improper NULL comparison") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_large_in_clause_rule.py b/test/analyzer/sql/test_large_in_clause_rule.py new file mode 100644 index 00000000..4eb05923 --- /dev/null +++ b/test/analyzer/sql/test_large_in_clause_rule.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_large_in_clause_rule.py +@desc: +""" + +import unittest +from handler.analyzer.sql.rules.review.large_in_clause import LargeInClauseAdjustedRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestLargeInClauseAdjustedRule(unittest.TestCase): + + def setUp(self): + self.rule = LargeInClauseAdjustedRule() + + def test_large_in_clause(self): + # 构建一个超过200个元素的IN子句的SQL语句 + large_in_clause_sql = "SELECT * FROM table1 WHERE id IN (" + ','.join(['?'] * 201) + ")" + parsed_stmt = parser.parse(large_in_clause_sql) + + self.assertTrue(self.rule.match(parsed_stmt), "Expected to match for SQL with over 200 IN elements") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_small_in_clause(self): + # 构建一个少于200个元素的IN子句的SQL语句 + small_in_clause_sql = "SELECT * FROM table1 WHERE id IN (" + ','.join(['?'] * 199) + ")" + parsed_stmt = parser.parse(small_in_clause_sql) + + self.assertFalse(self.rule.match(parsed_stmt), "Should not match for SQL within the limit of 200 IN elements") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_no_in_clause(self): + # 构建一个不包含IN子句的SQL语句 + no_in_clause_sql = "SELECT * FROM table1 WHERE column = 'value'" + parsed_stmt = parser.parse(no_in_clause_sql) + + self.assertFalse(self.rule.match(parsed_stmt), "Should not match for SQL without an IN clause") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_multi_table_join_rule.py b/test/analyzer/sql/test_multi_table_join_rule.py new file mode 100644 index 00000000..d758620e --- /dev/null +++ b/test/analyzer/sql/test_multi_table_join_rule.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_multi_table_join_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.multi_table_join import MultiTableJoinRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestMultiTableJoinRule(unittest.TestCase): + + def setUp(self): + self.rule = MultiTableJoinRule() + + def test_excessive_joins_detected(self): + # 假设这个SQL有超过5个JOIN + sql_with_excessive_joins = """ + SELECT * + FROM table1 + JOIN table2 ON table1.id = table2.table1_id + JOIN table3 ON table2.id = table3.table2_id + JOIN table4 ON table3.id = table4.table3_id + JOIN table5 ON table4.id = table5.table4_id + JOIN table6 ON table5.id = table6.table5_id + JOIN table7 ON table6.id = table7.table6_id + """ + parsed_stmt = parser.parse(sql_with_excessive_joins) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect excessive joins in SQL statement.") + + def test_no_excessive_joins(self): + # 正常SQL,少于等于5个JOIN + sql_no_excessive_joins = """ + SELECT * + FROM table1 + JOIN table2 ON table1.id = table2.table1_id + JOIN table3 ON table2.id = table3.table2_id + """ + parsed_stmt = parser.parse(sql_no_excessive_joins) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect excessive joins in SQL statement.") + + def test_complex_query_with_subqueries_no_excessive_joins(self): + # Complex query with subqueries but not exceeding join limit (e.g., 7 tables but only 4 joins) + sql_complex = """ + SELECT t1.*, t2.col + FROM table1 t1 + JOIN ( + SELECT t2.id, t3.col + FROM table2 t2 + JOIN table3 t3 ON t2.id = t3.table2_id + WHERE t3.col IN (SELECT col FROM table4 WHERE condition) + ) t2 ON t1.id = t2.id + JOIN table5 t5 ON t1.id = t5.table1_id + JOIN table6 t6 ON t5.id = t6.table5_id; + """ + parsed_stmt = parser.parse(sql_complex) + self.assertFalse(self.rule.match(parsed_stmt)) # Assuming subqueries don't increment join count + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_complex_query_with_excessive_joins_and_subqueries(self): + # Complex query exceeding join limit due to multiple explicit joins and possibly join in subqueries + sql_complex_excessive = """ + SELECT t1.*, t2.col + FROM table1 t1 + JOIN table2 t2 ON t1.id = t2.table1_id + JOIN table3 t3 ON t2.id = t3.table2_id + JOIN table4 t4 ON t3.id = t4.table3_id + JOIN table5 t5 ON t4.id = t5.table4_id + JOIN ( + SELECT t6.id, t7.col + FROM table6 t6 + JOIN table7 t7 ON t6.id = t7.table6_id + ) subquery ON t5.id = subquery.id; + """ + parsed_stmt = parser.parse(sql_complex_excessive) + self.assertTrue(self.rule.match(parsed_stmt)) + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_parse.py b/test/analyzer/sql/test_parse.py new file mode 100644 index 00000000..6889b566 --- /dev/null +++ b/test/analyzer/sql/test_parse.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_parse.py +@desc: +""" + +import unittest +from sqlgpt_parser.parser.oceanbase_parser import parser + + +class MyTestCase(unittest.TestCase): + def test_create_table(self): + sql = """ + CREATE TABLE tbl1 (c1 INT, c2 VARCHAR(50)) + """ + statment = "{'type': 'create_table', " "'table_name': 'tbl1', " "'element_list': [('c1', FieldType(), False), ('c2', FieldType(), False)]}" + result = parser.parse(sql) + self.assertEqual(str(result), statment) + self.assertEqual(result["type"], "create_table") + self.assertEqual(result["table_name"], "tbl1") + self.assertEqual(str(result["element_list"][0][1]), "INT") + + def test_create_table_1(self): + sql = """ +CREATE TABLE `ob_hist_sql_audit_stat_0` ( + `ob_cluster_id` bigint(20) NOT NULL COMMENT 'OB的集群Id', + `cluster_name` varchar(128) NOT NULL COMMENT 'OB的集群名称', + `ob_tenant_id` bigint(20) NOT NULL COMMENT 'OB的租户Id', + `ob_server_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'OB的服务Id', + `ob_db_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'OB的数据库Id', + `ob_user_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'OB的用户Id', + `sql_id` varchar(32) NOT NULL DEFAULT '0' COMMENT 'SQL_ID', + `begin_interval_time` bigint(20) NOT NULL COMMENT '统计指标的区间开始时间', + `end_interval_time` bigint(20) NOT NULL COMMENT '统计指标的区间结束时间', + `executions` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)执行次数', + `affected_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)更新行数', + `return_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)查询返回行数', + `partition_cnt` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)访问分区数', + `fail_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码不为0的发生次数', + `ret_code_4012_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-4012(OB_TIMEOUT)的发生次数', + `ret_code_4013_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-4013(OB_ALLOCATE_MEMORY_FAILED)的发生次数', + `ret_code_5001_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5001(OB_ERR_PARSE_SQL)的发生次数', + `ret_code_5024_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5024(OB_ERR_PRIMARY_KEY_DUPLICATE)的发生次数', + `ret_code_5167_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5167(OB_ERR_DATA_TOO_LONG)的发生次数', + `ret_code_5217_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5217(OB_ERR_BAD_FIELD_ERROR)的发生次数', + `ret_code_6002_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-6002(OB_TRANS_ROLLBACKED)的发生次数', + `last_fail_info` bigint(20) DEFAULT NULL COMMENT '最后一次错误信息,前46存储错误时间信息,后18位存储错误码信息', + `event_0_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"system internal wait\"等待事件的累计时间(微秒)(sum(case event when \"system internal wait\" then wait_time_micro else 0 end))', + `event_1_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"mysql response wait client\"等待事件的累计时间(微秒)(sum(case event when \"mysql response wait client\" then wait_time_micro else 0 end))', + `event_2_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"sync rpc\"等待事件的累计时间(微秒)(sum(case event when \"sync rpc\" then wait_time_micro else 0 end))', + `event_3_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"db file data read\"等待事件的累计时间(微秒)(sum(case event when \"db file data read\" then wait_time_micro else 0 end))', + `event_4_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"\"等待事件的累计时间(微秒)(sum(case event when \"\" then wait_time_micro else 0 end))', + `event_5_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"\"等待事件的累计时间(微秒)(sum(case event when \"\" then wait_time_micro else 0 end))', + `total_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)总等待时间(微秒)', + `total_waits` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)总等待次数', + `rpc_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)发送RPC次数', + `plan_type_local_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)本地执行计划次数(sum(case plan_type when 1 then 1 else 0 end))', + `plan_type_remote_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)远程执行计划次数(sum(case plan_type when 2 then 1 else 0 end))', + `plan_type_dist_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)分布式执行计划次数(sum(case plan_type when 3 then 1 else 0 end))', + `inner_sql_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)内部SQL次数(count is_inner_sql=1)', + `executor_rpc_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)执行RPC请求次数', + `miss_plan_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)未命中计划缓存的次数', + `elapsed_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)响应时间 —— 接受到请求到执行结束的总时间(微秒)', + `max_elapsed_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间忙内的最大值)最大响应时间', + `net_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)客户端请求传输到Observer的时间(微秒)', + `net_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)请求在Observer从网络进入队列的时间(微秒)s', + `queue_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)请求在队列中的等待时间(微秒)', + `decode_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)语法解析时间(微秒)', + `get_plan_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)生成计划的时间(微秒)', + `execute_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)执行计划的时间(微秒)', + `cpu_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)CPU时间(微秒)(execute_time+get_plan_time-total_wait_time_micro)', + `max_cpu_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的最大值)最大CPU时间(微秒)', + `application_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有Application类事件的总时间(微秒)', + `concurrency_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有Concurrency类事件的总时间(微秒)', + `user_io_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有UserIO类事件的总时间(微秒)', + `schedule_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有Schedule类事件的总时间(微秒)', + `row_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Row Cache命中次数', + `bloom_filter_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Bloom Filter Cache命中次数', + `block_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Block Cache命中次数', + `block_index_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Block Index Cache命中次数', + `disk_reads` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)磁盘读次数', + `retry_cnt` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)重试次数', + `table_scan` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)表扫描次数', + `consistency_level_strong` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)强一致性级别次数(sum(case consistency_level when 3 then 1 else 0 end))', + `consistency_level_weak` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)弱一致性级别次数(sum(case consistency_level when 2 then 1 else 0 end))', + `memstore_read_row_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Memstore读行数', + `ssstore_read_row_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Ssstore读行数', + `min_request_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最小的执行时间', + `max_request_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的执行时间', + `sql_type` bigint(20) NOT NULL DEFAULT '-1' COMMENT 'SQL的类型,1:select, 2:select for update, 3:insert,4:update, 5: delete, 6: replace', + `max_affected_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的更新行数', + `max_return_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的查询返回行数', + `max_partition_cnt` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的访问分区数', + `user_client_ip_of_max_affected_rows` varchar(32) NOT NULL DEFAULT '' COMMENT '区间内最大的影响行数对应的user_client_ip', + `user_client_ip_of_max_return_rows` varchar(32) NOT NULL DEFAULT '' COMMENT '区间内最大的返回行数对应的user_client_ip', + `user_client_ip_of_max_partition_cnt` varchar(32) NOT NULL DEFAULT '' COMMENT '区间内最大的分区数对应的user_client_ip', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`ob_cluster_id`, `cluster_name`, `ob_tenant_id`, `begin_interval_time`, `ob_server_id`, `ob_db_id`, `ob_user_id`, `sql_id`), + KEY `idx_ob_hist_sql_audit_stat_cluster_server_time` (`ob_cluster_id`, `cluster_name`, `ob_server_id`, `begin_interval_time`), + KEY `idx_sql_id_begin_interval_time` (`sql_id`, `begin_interval_time`) +) + """ + result = parser.parse(sql) + self.assertEqual(len(result["index_list"]), 3) + + def test_select(self): + sql = """ + SELECT * FROM T1 + """ + statment = '''{'line': 0, 'pos': 0, 'query_body': QuerySpecification(select=Select(distinct=False, select_items=[SingleColumn(expression=QualifiedNameReference(name=QualifiedName.of("*")))]), from_=Table(name=QualifiedName.of("T1"), for_update=False), order_by=[], limit=0, offset=0, for_update=False, nowait_or_wait=False), 'order_by': [], 'limit': 0, 'offset': 0}''' + result = parser.parse(sql) + self.assertEqual(str(result), statment) + + sql = """ + SELECT t1 FROM T1 + """ + statment = '''{'line': 0, 'pos': 0, 'query_body': QuerySpecification(select=Select(distinct=False, select_items=[SingleColumn(alias=[], expression=QualifiedNameReference(name=QualifiedName.of("t1")))]), from_=Table(name=QualifiedName.of("T1"), for_update=False), order_by=[], limit=0, offset=0, for_update=False, nowait_or_wait=False), 'order_by': [], 'limit': 0, 'offset': 0}''' + result = parser.parse(sql) + self.assertEqual(str(result), statment) + + sql = """ + SELECT t1 FROM T1 where t1 > 12 + """ + statment = '''{'line': 0, 'pos': 0, 'query_body': QuerySpecification(select=Select(distinct=False, select_items=[SingleColumn(alias=[], expression=QualifiedNameReference(name=QualifiedName.of("t1")))]), from_=Table(name=QualifiedName.of("T1"), for_update=False), where=ComparisonExpression(type='>', left=QualifiedNameReference(name=QualifiedName.of("t1")), right=LongLiteral(value=12)), order_by=[], limit=0, offset=0, for_update=False, nowait_or_wait=False), 'order_by': [], 'limit': 0, 'offset': 0}''' + result = parser.parse(sql) + print(result) + self.assertEqual(str(result), statment) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_sellect_all_rule.py b/test/analyzer/sql/test_sellect_all_rule.py new file mode 100644 index 00000000..2a3f1468 --- /dev/null +++ b/test/analyzer/sql/test_sellect_all_rule.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_sellect_all_rule.py +@desc: +""" +import unittest +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.review.select_all import SelectAllRule + + +class TestSelectAllCase(unittest.TestCase): + def test_select_all_rule_true(self): + statement = parser.parse("SELECT * FROM T1") + result_match = SelectAllRule().match(statement) + self.assertTrue(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + print(result_suggestion) + # self.assertIsNotNone(result_suggestion) + + def test_select_all_rule_false(self): + statement = parser.parse("SELECT 1 FROM T1") + result_match = SelectAllRule().match(statement) + self.assertFalse(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + self.assertIsNotNone(result_suggestion) + + def test_select_all_rule_false_1(self): + statement = parser.parse("SELECT count(*) FROM T1") + result_match = SelectAllRule().match(statement) + self.assertFalse(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + self.assertIsNotNone(result_suggestion) + + def test_select_all_rule_true_1(self): + sql = ''' + SELECT * + FROM Employees e + JOIN Departments d ON e.DepartmentID = d.DepartmentID + LEFT JOIN ( + SELECT EmployeeID, ProjectID, COUNT(*) AS NumberOfProjects + FROM Projects_Employees_Pivot + GROUP BY EmployeeID, ProjectID + ) pe ON e.EmployeeID = pe.EmployeeID + WHERE d.DepartmentName = 'Sales' + ORDER BY e.EmployeeName + ''' + statement = parser.parse(sql) + result_match = SelectAllRule().match(statement) + self.assertTrue(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + self.assertIsNotNone(result_suggestion) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_update_delete_multi_table_rule.py b/test/analyzer/sql/test_update_delete_multi_table_rule.py new file mode 100644 index 00000000..1e044492 --- /dev/null +++ b/test/analyzer/sql/test_update_delete_multi_table_rule.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_update_delete_multi_table_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.update_delete_multi_table import UpdateDeleteMultiTableRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestUpdateDeleteMultiTableRule(unittest.TestCase): + + def setUp(self): + self.rule = UpdateDeleteMultiTableRule() + + def test_update_multi_table_detected(self): + # 假设这个SQL包含了多表更新 + sql_with_multi_table_update = """ + UPDATE table1 + INNER JOIN table2 ON table1.id = table2.table1_id + SET table1.column = 'new_value' + """ + parsed_stmt = parser.parse(sql_with_multi_table_update) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect multi-table UPDATE operation.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_delete_multi_table_detected(self): + # 假设这个SQL包含了多表删除 + sql_with_multi_table_delete = """ + DELETE table1 + FROM table1 + INNER JOIN table2 ON table1.id = table2.table1_id + """ + parsed_stmt = parser.parse(sql_with_multi_table_delete) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect multi-table DELETE operation.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_delete_with_subquery_and_join(self): + """测试包含子查询和联接的多表删除""" + complex_delete_sql = """ + DELETE table1 + FROM table1 + INNER JOIN ( + SELECT table1_id + FROM table2 + WHERE some_column = 'some_value' + GROUP BY table1_id + HAVING COUNT(*) > 1 + ) subquery ON table1.id = subquery.table1_id + """ + parsed_stmt = parser.parse(complex_delete_sql) + self.assertTrue(self.rule.match(parsed_stmt), "Should detect complex multi-table DELETE operation.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_single_table_operation(self): + # 单表更新操作,应不触发警告 + sql_single_table_update = "UPDATE table1 SET column = 'value' WHERE id = 1" + parsed_stmt = parser.parse(sql_single_table_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect single-table UPDATE operation as an issue.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + # 单表删除操作,同样不应触发警告 + sql_single_table_delete = "DELETE FROM table1 WHERE id = 1" + parsed_stmt = parser.parse(sql_single_table_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect single-table DELETE operation as an issue.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_update_delete_without_where_or_true_condition_rule.py b/test/analyzer/sql/test_update_delete_without_where_or_true_condition_rule.py new file mode 100644 index 00000000..97c9c0d2 --- /dev/null +++ b/test/analyzer/sql/test_update_delete_without_where_or_true_condition_rule.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_update_delete_multi_table_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.update_delete_without_where_or_true_condition import UpdateDeleteWithoutWhereOrTrueConditionRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestUpdateDeleteWithoutWhereConditionRule(unittest.TestCase): + + def setUp(self): + self.rule = UpdateDeleteWithoutWhereOrTrueConditionRule() + + def test_update_without_where(self): + sql_without_where_update = "UPDATE table1 SET column1 = 'new_value'" + parsed_stmt = parser.parse(sql_without_where_update) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect UPDATE without WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_delete_without_where(self): + sql_without_where_delete = "DELETE FROM table1" + parsed_stmt = parser.parse(sql_without_where_delete) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect DELETE without WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_update_with_always_true_where(self): + sql_always_true_update = "UPDATE table1 SET column1 = 'new_value' WHERE 1 = 1" + parsed_stmt = parser.parse(sql_always_true_update) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect UPDATE with always-true WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_delete_with_always_true_where(self): + sql_always_true_delete = "DELETE FROM table1 WHERE 1 = 1" + parsed_stmt = parser.parse(sql_always_true_delete) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect DELETE with always-true WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_valid_update_with_where(self): + sql_valid_update = "UPDATE table1 SET column1 = 'new_value' WHERE id = 1" + parsed_stmt = parser.parse(sql_valid_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect a valid UPDATE with WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_valid_delete_with_where(self): + sql_valid_delete = "DELETE FROM table1 WHERE id = 1" + parsed_stmt = parser.parse(sql_valid_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect a valid DELETE with WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_update_with_nested_subquery(self): + # 更新语句中使用了嵌套子查询,但依然有有效的WHERE条件 + sql_nested_update = """ + UPDATE table1 + SET column = (SELECT MAX(sub_col) FROM table2 WHERE table2.id = table1.id) + WHERE EXISTS(SELECT 1 FROM table3 WHERE table3.table1_id = table1.id) + """ + parsed_stmt = parser.parse(sql_nested_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag an UPDATE with a nested subquery and a valid WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_delete_with_function_in_where(self): + # 删除语句中WHERE子句使用了函数,但不是恒真条件 + sql_function_delete = "DELETE FROM table1 WHERE DATE(column) = CURDATE()" + parsed_stmt = parser.parse(sql_function_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag a DELETE with a function in WHERE clause that's not always true.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_complex_delete_with_multi_level_joins(self): + # 复杂的多层JOIN删除,但有合理的WHERE条件限制 + sql_complex_delete = """ + DELETE t1 + FROM table1 t1 + JOIN table2 t2 ON t1.id = t2.t1_id + JOIN table3 t3 ON t2.id = t3.t2_id + WHERE t3.status = 'active' + """ + parsed_stmt = parser.parse(sql_complex_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag a DELETE with multi-level JOINs and a specific WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_update_with_case_expression(self): + # UPDATE语句使用CASE表达式设置列值,同时有WHERE条件 + sql_case_update = """ + UPDATE table1 + SET column = CASE WHEN column2 = 'value' THEN 'new_val' ELSE column END + WHERE column3 IS NOT NULL + """ + parsed_stmt = parser.parse(sql_case_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag an UPDATE using CASE expression and a WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_delete_with_false_condition(self): + # DELETE语句有一个永远为假的WHERE条件 + sql_false_delete = "DELETE FROM table1 WHERE 1 = 0" + parsed_stmt = parser.parse(sql_false_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should flag a DELETE with a never-true WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_update_with_multiple_conditions(self): + # UPDATE语句带有多个AND/OR连接的条件 + sql_multiple_conditions_update = """ + UPDATE table1 + SET column = 'new_value' + WHERE column1 = 'value1' AND column2 = 'value2' OR column3 IN ('value3', 'value4') + """ + parsed_stmt = parser.parse(sql_multiple_conditions_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag an UPDATE with multiple, combined WHERE conditions.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + +if __name__ == '__main__': + unittest.main()