-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsqlback.py
161 lines (143 loc) · 6.45 KB
/
sqlback.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from utils.models import MeiTuanShop, JingDong, EnterpriseCq, GoverNews, WaiMai, XieCheng, DZDianPingCQ, TuNiu, ShunQi, \
WGQY, TuNiuAll, BFZY, DZDianPing, BFZYCQ, SouLeWang, QYLu, HuangYe, CnTrade, MetalInc, TaoJin
from utils.sqlbackends import session_scope, session_scope_remote
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from apscheduler.schedulers.background import BackgroundScheduler
import time
from sqlalchemy import or_
datatable = ["MeiTuanShop", "JingDong", "EnterpriseCq", "GoverNews"]
mysql_client_remote = create_engine(
"mysql+mysqlconnector://root:[email protected]:3306/crawls?charset=utf8",
encoding="utf-8",
)
session_sql_remote = sessionmaker(bind=mysql_client_remote)
def table_back():
atts = globals()
for item1 in datatable:
table = atts.get(item1)
count = 0
session_remote = session_sql_remote()
with session_scope() as sess1:
ms = sess1.query(table).filter().all()
for item in ms:
print(item.__dict__)
temp = item.__dict__
temp["id"] = None
temp.pop("_sa_instance_state")
ta = table(**temp)
count = count + 1
session_remote.add(ta)
if count % 5000 == 0:
session_remote.commit()
session_remote.commit()
query_map = {"MeiTuanShop": "select id from meiTuanShop where shop='{shop}' and phone='{phone}' and url='{url}'",
"JingDong": "select id from jingDong where productName='{productName}' and productUrl='{productUrl}' and price='{price}'",
"EnterpriseCq": "select id from enterprise where enterpriseName='{enterpriseName}' and address='{address}' and socialCreditCode='{socialCreditCode}'",
"GoverNews": "select id from govermentnews where title='{title}' and publishDate='{publishDate}' and url='{url}'",
"WaiMai": "select id from meituanwaimai where url='{url}'",
"XieCheng": "select id from xiechenghotel where url = '{url}'",
"DZDianPing": "select id from dianping where url = '{url}'",
"TuNiu": "select id from tuniu where url = '{url}'",
"ShunQi": "select id from shunqi where url = '{url}'",
"WGQY": "select id from wanguoqiye where url = '{url}'",
"TuNiuAll": "select id from tuniuquanguo where url = '{url}'",
"BFZY": "select id from bafangziyuan where url = '{url}'",
"BFZYCQ": "select id from bafangziyuanchongqing where url = '{url}'",
"DZDianPingCQ": "select id from dazhongdianping where url = '{url}'",
"SouLeWang": "select id from 51sole where url = '{url}'",
"QYLu": "select id from qiyelu where url = '{url}'",
"HuangYe": "select id from huangye88 where url = '{url}'",
"CnTrade": "select id from cntrade where url = '{url}'",
"MetalInc": "select id from metalinc where url = '{url}'",
"TaoJin": "select id from taojindi where url = '{url}'",
}
def zengliang_back():
atts = globals()
session_remote = session_sql_remote()
for item1 in query_map.keys():
# print(item1)
table = atts.get(item1)
ms = session_remote.query(table).order_by(table.id.desc()).first()
with session_scope() as sess1:
if not ms:
dd = [0]
else:
res = sess1.execute(query_map.get(item1).format(**ms.__dict__))
dd = []
for id in res.fetchall():
dd.append(id[0])
if not dd:
continue
id_new = max(dd)
if len(dd) >= 2:
print("youchongfu {} {}".format(item1, dd))
ms = sess1.query(table).filter(table.id > id_new).all()
count = 0
for item in ms:
print(item.__dict__)
temp = item.__dict__
temp["id"] = None
temp.pop("_sa_instance_state")
if "businessScope" in temp:
tt = temp.get("businessScope")
if tt and len(tt) > 666:
temp["businessScope"] = tt[: 600]
ta = table(**temp)
count = count + 1
session_remote.add(ta)
if count % 1000 == 0:
session_remote.commit()
session_remote.commit()
session_remote.close()
def total_count():
atts = globals()
session_remote = session_sql_remote()
total = 0
res = {}
for item1 in query_map.keys():
table = atts.get(item1)
ms = session_remote.query(table).count()
total += ms
res[item1] = ms
res["total"] = total
print(res)
def get_attr_for_check(table):
res = []
for item in dir(EnterpriseCq):
if not item.startswith("__") and item != "id" and not item.startswith(
"_") and "date" not in item.lower() and "type" not in item.lower():
res.append(item)
return res
def fix_funds():
with session_scope() as sess:
na = sess.query(BFZY).filter(BFZY.registeredFunds == None).all()
for item1 in na:
if item1.about:
tem = item1.about.split(";")
for item in tem:
t = item.split(":")
if "注册资金" == t[0]:
item1.registeredFunds = t[1]
def tuniutongbu():
with session_scope() as sess:
with session_scope_remote() as sess_remote:
na = sess.query(TuNiuAll).filter(or_(TuNiuAll.phone != None, TuNiuAll.district != None)).all()
for item in na:
ture = sess_remote.query(TuNiuAll).filter(TuNiuAll.url == item.url).first()
if not ture.phone and not ture.district:
ture.phone = item.phone
ture.district = item.district
sess_remote.commit()
if __name__ == "__main__":
zengliang_back()
tuniutongbu()
# total_count()
# scheduler = BackgroundScheduler()
# scheduler.add_job(zengliang_back, 'interval', hours=6)
# scheduler.start()
# try:
# while True:
# time.sleep(10)
# except (KeyboardInterrupt, SystemExit):
# scheduler.shutdown()