Skip to content

Commit

Permalink
2.5.0 gather redact (#446)
Browse files Browse the repository at this point in the history
* support redact

* support redact

* support redact

* build test package

* build test package

* build test package

* build test package

* build test package

* build test package

* delete test package

* delete test package

* delete test package

* format
  • Loading branch information
wayyoungboy authored Sep 25, 2024
1 parent c36da7e commit 5c09dff
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 5 deletions.
49 changes: 49 additions & 0 deletions common/import_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*
# Copyright (c) 2022 OceanBase
# OceanBase Diagnostic Tool is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
# http://license.coscl.org.cn/MulanPSL2
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.

"""
@time: 2024/09/19
@file: import_modules.py
@desc:
"""
import os

from common.tool import DynamicLoading


class ImportModulesException(Exception):
pass


# 实现模块导入,要求module_name为模块名和需要导入的对象名,module_file_path为模块文件路径


def import_modules(module_file_dir, stdio):
stdio.verbose("import_modules input: module_file_dir->{0}".format(module_file_dir))
try:
module_files = []
module_list = {}
for root, dirs, files in os.walk(module_file_dir):
if root == module_file_dir:
module_files = files
for module_file in module_files:
module_name = os.path.basename(module_file)[:-3]
DynamicLoading.add_lib_path(module_file_dir)
module = DynamicLoading.import_module(os.path.basename(module_file)[:-3], None)
if not hasattr(module, module_name):
stdio.error("{0} import_module failed".format(module_name))
continue
module_list[module_name] = getattr(module, module_name)
return module_list
except Exception as e:
stdio.error("import_modules failed: {0}".format(e))
raise ImportModulesException("import_modules failed: {0}".format(e))
1 change: 1 addition & 0 deletions conf/inner_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ check:
tasks_base_path: "~/.obdiag/check/tasks/"
gather:
scenes_base_path: "~/.obdiag/gather/tasks"
redact_processing_num: 3
rca:
result_path: "./rca/"
4 changes: 1 addition & 3 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@
'package_file': '~/.obdiag/check/check_package.yaml',
'tasks_base_path': '~/.obdiag/check/tasks/',
},
'gather': {
'scenes_base_path': '~/.obdiag/gather/tasks',
},
'gather': {'scenes_base_path': '~/.obdiag/gather/tasks', 'redact_processing_num': 3},
'rca': {
'result_path': './rca/',
},
Expand Down
2 changes: 2 additions & 0 deletions diag_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def __init__(self):
self.parser.add_option('--temp_dir', type='string', help='the dir for temporarily storing files on nodes', default='/tmp')
self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml'))
self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value')
self.parser.add_option('--redact', type='string', help='desensitization options', default='')

def init(self, cmd, args):
super(ObdiagGatherLogCommand, self).init(cmd, args)
Expand Down Expand Up @@ -681,6 +682,7 @@ def __init__(self):
self.parser.add_option('--skip_type', type='string', help='The types of gather to be skipped, choices=[ssh, sql]')
self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml'))
self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value')
self.parser.add_option('--redact', type='string', help='desensitization options', default='')

def init(self, cmd, args):
super(ObdiagGatherSceneRunCommand, self).init(cmd, args)
Expand Down
23 changes: 23 additions & 0 deletions handler/gather/gather_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@
from common.tool import DirectoryUtil
from common.tool import FileUtil
from common.tool import NetUtils
from handler.gather.plugins.redact import Redact
from result_type import ObdiagResult


class GatherLogHandler(BaseShellHandler):
def __init__(self, context, gather_pack_dir='./', is_scene=False):
super(GatherLogHandler, self).__init__()
self.redact_dir = None
self.redact = []
self.pack_dir_this_command = ""
self.context = context
self.stdio = context.stdio
Expand Down Expand Up @@ -80,6 +83,7 @@ def init_option(self):
scope_option = Util.get_option(options, 'scope')
encrypt_option = Util.get_option(options, 'encrypt')
temp_dir_option = Util.get_option(options, 'temp_dir')
redact_option = Util.get_option(options, 'redact')
if self.context.get_variable("gather_from", None):
from_option = self.context.get_variable("gather_from")
if self.context.get_variable("gather_to", None):
Expand Down Expand Up @@ -133,6 +137,12 @@ def init_option(self):
self.grep_options = grep_option
if temp_dir_option:
self.gather_ob_log_temporary_dir = temp_dir_option
if redact_option:
if redact_option != "" and len(redact_option) != 0:
if "," in redact_option:
self.redact = redact_option.split(",")
else:
self.redact = [redact_option]
return True

def handle(self):
Expand Down Expand Up @@ -174,6 +184,19 @@ def handle_from_node(node):
# Persist the summary results to a file
FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples)
last_info = "For result details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(pack_dir_this_command, "result_summary.txt"))
self.stdio.print(last_info)
try:
if self.redact and len(self.redact) > 0:
self.stdio.verbose("redact_option is {0}".format(self.redact))
redact_dir = "{0}_redact".format(pack_dir_this_command)
self.redact_dir = redact_dir
redact = Redact(self.context, self.pack_dir_this_command, redact_dir)
redact.redact_files(self.redact)
self.stdio.print("redact success the log save on {0}".format(self.redact_dir))
return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": redact_dir})
except Exception as e:
self.stdio.error("redact failed {0}".format(e))
return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="redact failed {0}".format(e))
return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command})

def __handle_from_node(self, pack_dir_this_command, node):
Expand Down
17 changes: 17 additions & 0 deletions handler/gather/plugins/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*
# Copyright (c) 2022 OceanBase
# OceanBase Diagnostic Tool is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
# http://license.coscl.org.cn/MulanPSL2
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.

"""
@time: 2024/09/18
@file: __init__.py
@desc:
"""
113 changes: 113 additions & 0 deletions handler/gather/plugins/redact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import shutil
import zipfile

from common.import_module import import_modules
import multiprocessing as mp


class Redact:
def __init__(self, context, input_file_dir, output_file_dir):
self.context = context
self.stdio = context.stdio
self.redacts = {}
self.input_file_dir = input_file_dir
self.output_file_dir = output_file_dir
self.module_dir = os.path.expanduser('~/.obdiag/gather/redact')
self.inner_config = self.context.inner_config

# init all redact
# import all redact module
self.all_redact = []
try:
self.stdio.print("Importing redact modules...")
self.all_redact = import_modules(self.module_dir, self.stdio)
self.stdio.verbose("Imported redact module {0}".format(self.all_redact))
except Exception as e:
self.stdio.error(f"Error importing redact modules: {e}")
raise e

def check_redact(self, input_redacts):
for input_redact in input_redacts:
if not input_redact in self.all_redact:
self.stdio.error("Redact {0} not found".format(input_redact))
raise Exception(f"Redact {input_redact} not found")
else:
self.stdio.verbose(f"Redact {input_redact} found")
self.redacts[input_redact] = self.all_redact[input_redact]

def redact_files(self, input_redacts):
self.stdio.verbose("redact_files start")
self.check_redact(input_redacts)
# check self.redacts
if not self.redacts or len(self.redacts) == 0:
self.stdio.error("No redact found")
return False
# create dir to save the files after redact
if not os.path.exists(self.output_file_dir):
os.makedirs(self.output_file_dir)
# use threading to redact the files
files_name = os.listdir(self.input_file_dir)
self.stdio.verbose(files_name)
# unzip the log file
for zip_file in files_name:
if ".zip" in zip_file:
self.stdio.verbose("open zip file: {0}".format(os.path.join(self.input_file_dir, zip_file)))
with zipfile.ZipFile(os.path.join(self.input_file_dir, zip_file), 'r') as zip_ref:
# Extract all files to the current directory
zip_ref.extractall(self.input_file_dir)
gather_log_files = []
for file_name in os.listdir(self.input_file_dir):
if "zip" not in file_name and "result_summary.txt" not in file_name:
log_dir = os.path.join(self.input_file_dir, file_name)
for log_file in os.listdir(log_dir):
gather_log_files.append(os.path.join(log_dir, log_file))
self.stdio.verbose("result_log_files add {0}".format(os.path.join(log_dir, log_file)))
file_queue = []
max_processes = int(self.inner_config.get('gather').get('redact_processing_num')) or 3
self.stdio.verbose("max_processes: {0}".format(max_processes))
semaphore = mp.Semaphore(max_processes)
for file_name in gather_log_files:
if "result_summary.txt" in file_name:
continue
self.stdio.verbose("inport file name: {0}".format(file_name))
self.stdio.verbose("output file name: {0}".format(file_name.replace(self.input_file_dir, self.output_file_dir)))
semaphore.acquire()
file_thread = mp.Process(target=self.redact_file, args=(file_name, file_name.replace(self.input_file_dir, self.output_file_dir), semaphore))
file_thread.start()
file_queue.append(file_thread)
for file_thread in file_queue:
file_thread.join()
# zip the dir by node
subfolders = [f for f in os.listdir(self.output_file_dir) if os.path.isdir(os.path.join(self.output_file_dir, f))]
for subfolder in subfolders:
subfolder_path = os.path.join(self.output_file_dir, subfolder)
zip_filename = os.path.join(self.output_file_dir, f"{subfolder}.zip")
with zipfile.ZipFile(zip_filename, 'w') as zipf:
for root, dirs, files in os.walk(subfolder_path):
for file in files:
file_path = os.path.join(root, file)
zipf.write(file_path, os.path.relpath(file_path, subfolder_path))
self.stdio.verbose("delete the dir: {0}".format(subfolder_path))
shutil.rmtree(subfolder_path)
self.stdio.print(f"{subfolder} is zipped on {zip_filename}")
return True

def redact_file(self, input_file, output_file, semaphore):
try:
input_file = os.path.abspath(input_file)
output_file = os.path.abspath(output_file)
dir_path = os.path.dirname(output_file)
log_content = ""
if not os.path.exists(dir_path):
os.makedirs(dir_path)
with open(input_file, 'r', encoding='utf-8', errors='ignore') as file:
log_content = file.read()
for redact in self.redacts:
log_content = self.redacts[redact].redact(log_content)
with open(output_file, 'w', encoding='utf-8', errors='ignore') as file:
file.write(log_content)
except Exception as e:
self.stdio.error(f"Error redact file {input_file}: {e}")
finally:
semaphore.release()
41 changes: 41 additions & 0 deletions handler/gather/plugins/redact/all_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*
# Copyright (c) 2022 OceanBase
# OceanBase Diagnostic Tool is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
# http://license.coscl.org.cn/MulanPSL2
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.

"""
@time: 2024/09/19
@file: all_sql.py
@desc:
"""
import re


class all_sql:
def __init__(self):
pass

def redact(self, text):
patterns = [
(r'stmt:"(.*?[^\\])", stmt_len', 'stmt:"<SQL_QUERY_REDACTED>", stmt_len'),
(r'ps_sql:"(.*?[^\\])", is_expired_evicted', 'ps_sql:"<SQL_QUERY_REDACTED>", is_expired_evicted'),
(r'ps_sql:"(.*?[^\\])", ref_count:', 'ps_sql:"<SQL_QUERY_REDACTED>", ref_count:'),
(r'origin_sql=(.*?[^\\]), ps_stmt_checksum', 'origin_sql=<SQL_QUERY_REDACTED>, ps_stmt_checksum'),
(r'get_sql_stmt\(\)=(.*?[^\\]), route_sql_=', 'get_sql_stmt()=<SQL_QUERY_REDACTED>, route_sql_='),
(r'multi_stmt_item={(.*?[^\\])\}', 'multi_stmt_item={<SQL_QUERY_REDACTED>}'),
]
log_content = text
# 遍历所有模式并进行替换
for pattern, replacement in patterns:
log_content = re.sub(pattern, replacement, text, flags=re.DOTALL)
return log_content


all_sql = all_sql()
2 changes: 1 addition & 1 deletion rpm/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

python_bin='python'
W_DIR=`pwd`
VERSION=${VERSION:-'2.4.0'}
VERSION=${VERSION:-'2.5.0'}


function python_version()
Expand Down
5 changes: 4 additions & 1 deletion rpm/oceanbase-diagnostic-tool.spec
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Name: oceanbase-diagnostic-tool
Version:2.4.0
Version:2.5.0
Release: %(echo $RELEASE)%{?dist}
Summary: oceanbase diagnostic tool program
Group: Development/Tools
Expand Down Expand Up @@ -34,6 +34,7 @@ mkdir -p $BUILD_DIR/SOURCES/resources
mkdir -p $BUILD_DIR/SOURCES/check/tasks
mkdir -p $BUILD_DIR/SOURCES/gather/tasks
mkdir -p $BUILD_DIR/SOURCES/rca
mkdir -p $BUILD_DIR/SOURCES/gather/redact
mkdir -p $BUILD_DIR/SOURCES/dependencies/bin
mkdir -p ${RPM_BUILD_ROOT}/usr/bin
mkdir -p ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool
Expand All @@ -46,6 +47,7 @@ rm -f obdiag.py oceanbase-diagnostic-tool.spec
\cp -rf $SRC_DIR/handler/checker/tasks $BUILD_DIR/SOURCES/check
\cp -rf $SRC_DIR/handler/gather/tasks $BUILD_DIR/SOURCES/gather
\cp -rf $SRC_DIR/handler/rca/scene/* $BUILD_DIR/SOURCES/rca
\cp -rf $SRC_DIR/handler/gather/plugins/redact/*.py $BUILD_DIR/SOURCES/gather/redact
\cp -rf $SRC_DIR/init.sh $BUILD_DIR/SOURCES/init.sh
\cp -rf $SRC_DIR/init_obdiag_cmd.sh $BUILD_DIR/SOURCES/init_obdiag_cmd.sh
\cp -rf $SRC_DIR/conf $BUILD_DIR/SOURCES/conf
Expand All @@ -67,6 +69,7 @@ mkdir -p ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/rca
mv ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/check/tasks/*.yaml ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/check/
\cp -rf $BUILD_DIR/SOURCES/gather/tasks ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/gather
\cp -rf $BUILD_DIR/SOURCES/rca/* ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/rca
\cp -rf $BUILD_DIR/SOURCES/gather/redact ${RPM_BUILD_ROOT}/usr/local/oceanbase-diagnostic-tool/gather


%files
Expand Down

0 comments on commit 5c09dff

Please sign in to comment.