Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDH6.3.2版本WeDataSphe容器化 #40

Open
wants to merge 13 commits into
base: wds-cdh-6.3.2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions dockers/cdh6.3.2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM dss_linkis:v1

MAINTAINER Zsy "[email protected]"

RUN pip3 install --no-cache-dir -r /wedatasphere/docker/conf/requirements.txt -i \
http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com

COPY docker /wedatasphere/docker/
COPY sbin /wedatasphere/sbin/

RUN chown -R hdfs:hdfs /wedatasphere && chmod +x /wedatasphere/docker/script/run.sh

WORKDIR /wedatasphere

CMD ["/bin/bash", "/wedatasphere/docker/script/run.sh"]
23 changes: 23 additions & 0 deletions dockers/cdh6.3.2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
WeDataSphere

需求文档:

https://github.com/WeBankFinTech/WeDataSphere/issues/39

使用流程:

1、安装部署:

参考安装部署文档即可快速安装使用WeDataSphere
[安装部署文档](https://github.com/MrZsy/WeDataSphere/blob/master/docs/dockers/cdh6.3.2/%E5%AE%89%E8%A3%85%E9%83%A8%E7%BD%B2%E6%96%87%E6%A1%A3.md)

2、组件升级参考:
有需要组件升级及使用其他版本组件的开发同学可以参考[升级指南](https://github.com/MrZsy/WeDataSphere/blob/master/docs/dockers/cdh6.3.2/%E5%8D%87%E7%BA%A7%E6%8C%87%E5%8D%97.md) 快速更改版本。


3、常见问题参考:
[常见问题](https://github.com/MrZsy/WeDataSphere/blob/master/docs/dockers/cdh6.3.2/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98.md)


4、其它参考官方文档:
[开发文档](https://github.com/WeBankFinTech/DataSphereStudio-Doc/tree/main/zh_CN/%E5%BC%80%E5%8F%91%E6%96%87%E6%A1%A3)
5 changes: 5 additions & 0 deletions dockers/cdh6.3.2/docker/conf/conf.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[hiveMetaData]
hive.meta.host: 172.16.13.224
hive.meta.db: metastore
hive.meta.user: hive
hive.meta.password: 123456789
3 changes: 3 additions & 0 deletions dockers/cdh6.3.2/docker/conf/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PyMySQL==1.0.2
PyYAML==6.0
configparser
125 changes: 125 additions & 0 deletions dockers/cdh6.3.2/docker/script/db_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# !/usr/bin/python3
# -*- coding: utf-8 -*-
"""
@IDE : PyCharm
@File : db_update.py
@Time : 2023-03-07 9:51
@Place : BeiJing
@Author : Zsy
@Version : 1.0
"""
import os
import pymysql
import parse_yarn_ip

table_list = [
"dss_appconn_instance",
"dss_workspace_dictionary",
"dss_workspace_menu_appconn",
"linkis_ps_bml_resources_task",
"linkis_ps_bml_resources_version",
"linkis_ps_dm_datasource_env",
"linkis_cg_rm_external_resource_provider"
]


def get_host_ip():
import socket
# 获取计算机名称
hostname = socket.gethostname()
# 获取本机IP
# ip = os.getenv("HOST_IP")
ip = socket.gethostbyname(hostname)
return ip


def conn_mysql():
conn = pymysql.connect(host="127.0.0.1", port=3306, user="dss_server", password="dssServer123.",
db="dss_server", cursorclass=pymysql.cursors.DictCursor, charset='utf8')
return conn


def update_ip(ip):
db_conn = conn_mysql()
cursor = db_conn.cursor()
old_ip = "172.17.0.5"
for table in table_list:
sql = f"select * from dss_server.{table};"
cursor.execute(sql)
res = cursor.fetchall()
print("更新数据库表: {}".format(table))
if table == "dss_appconn_instance":
for i in res:
data = dict(i)
url = data.get("url")
home_page_url = data.get("homepage_uri")
if url and old_ip in url:
url = url.replace(old_ip, os.getenv("HOST_IP"))
idx = data.get("id")
update_sql = f"update dss_server.{table} set url = '{url}' where id ={idx};"
cursor.execute(update_sql)
db_conn.commit()
if home_page_url and old_ip in home_page_url:
url = home_page_url.replace(old_ip, os.getenv("HOST_IP"))
idx = data.get("id")
update_sql = f"update dss_server.{table} set homepage_uri = '{url}' where id = {idx};"
cursor.execute(update_sql)
db_conn.commit()
if table == "dss_workspace_dictionary":
for i in res:
data = dict(i)
url = data.get("url")
if url and old_ip in url:
url = url.replace(old_ip, ip)
idx = data.get("id")
update_sql = f"update dss_server.{table} set url = '{url}' where id ={idx};"
cursor.execute(update_sql)
db_conn.commit()
pass
if table == "dss_workspace_menu_appconn":
for i in res:
data = dict(i)
manual_button_url = data.get("manual_button_url")
if manual_button_url and old_ip in manual_button_url:
url = manual_button_url.replace(old_ip, ip)
idx = data.get("id")
update_sql = f"update dss_server.{table} set manual_button_url = '{url}' where id ={idx};"
cursor.execute(update_sql)
db_conn.commit()
pass
if table == "linkis_ps_bml_resources_task" or table == "linkis_ps_bml_resources_version":
update_sql = f"update dss_server.{table} set client_ip = '{ip}';"
cursor.execute(update_sql)
db_conn.commit()
if table == "linkis_ps_dm_datasource_env":
for i in res:
data = dict(i)
idx = data.get("id")
parameter = data.get("parameter")
if parameter and old_ip in parameter:
parameter = parameter.replace(old_ip, ip)
update_sql = f"update dss_server.{table} set parameter = '{parameter}' where id = {idx};"
cursor.execute(update_sql)
db_conn.commit()
if table == "linkis_cg_rm_external_resource_provider":
yarn_ip = parse_yarn_ip.run()
for i in res:
data = dict(i)
idx = data.get("id")
# config = data.get("config").replace("http://172.16.13.131:8088", yarn_ip)
config = data.get("config")
if config and "172.16.13.131" in config:
config = config.replace("http://172.16.13.131:8088", yarn_ip)
update_sql = f"update dss_server.{table} set config = '{config}' where id = {idx};"
cursor.execute(update_sql)
db_conn.commit()


def run():
ip = get_host_ip()
update_ip(ip)
print("数据库更新成功")


if __name__ == "__main__":
run()
67 changes: 67 additions & 0 deletions dockers/cdh6.3.2/docker/script/parse_yarn_ip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# !/usr/bin/python3
# -*- coding: utf-8 -*-
"""
@IDE : PyCharm
@File : parse_yarn_ip.py
@Time : 2023-03-06 15:43
@Place : BeiJing
@Author : Zsy
@Version : 1.0
"""

from xml.etree.ElementTree import ElementTree


def read_xml(in_path):
"""
读取并解析xml文件
in_path: xml路径
return: ElementTree
"""
tree = ElementTree()
tree.parse(in_path)
return tree


def find_nodes(tree, path):
"""
查找某个路径匹配的所有节点
tree: xml树
path: 节点路径
"""
return tree.findall(path)


def get_yarn_ips(nodelist, kv_map):
"""
获取yarn ip
nodelist: 节点列表
kv_map: 匹配属性及属性值map

"""
k_list, ip_list = [], []
for node in nodelist:
name = node.find('name').text
if name == kv_map.get("name"):
for i in node.find('value').text.split(","):
k_list.append("yarn.resourcemanager.webapp.address." + i)
if not k_list:
k_list.append("yarn.resourcemanager.webapp.address")
for node in nodelist:
name = node.find('name').text
if name in k_list:
ip_list.append(node.find('value').text)
return ip_list


def run():
tree = read_xml("/etc/hadoop/conf/yarn-site.xml")
# tree = read_xml("/wedatasphere/cdh/config/hadoop/yarn-site.xml")
nodes = find_nodes(tree, "property")
ips = get_yarn_ips(nodes, {"name": "yarn.resourcemanager.ha.rm-ids"})
ips = ["http://" + ip for ip in ips if "http://" not in ip]
return ";".join(ips)


if __name__ == "__main__":
run()
16 changes: 16 additions & 0 deletions dockers/cdh6.3.2/docker/script/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
echo "启动mysql"
/etc/init.d/mysqld start --user=mysql &
sleep 3
echo "启动nginx"
/usr/sbin/nginx -c /etc/nginx/nginx.conf
echo "配置hdfs路径"
source /etc/profile
echo "${CDH_HOME}"
sh ${CDH_HOME}/bin/hdfs dfs -mkdir -p /tmp/test/linkis
sh ${CDH_HOME}/bin/hdfs dfs -chmod 775 /tmp/test/linkis
echo "初始化配置文件"
python3 /wedatasphere/docker/script/db_update.py
python3 /wedatasphere/docker/script/update_config.py
/usr/sbin/nginx -s reload
echo "启动服务"
su - hdfs -s /bin/bash /wedatasphere/sbin/start-all.sh
Loading