Skip to content
This repository has been archived by the owner on Mar 12, 2024. It is now read-only.

Commit

Permalink
Add some checks. (#39)
Browse files Browse the repository at this point in the history
  • Loading branch information
puppylpg authored Nov 12, 2020
1 parent 60a61f4 commit 9adf192
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 16 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,11 @@
* bugfix
- 修复 Win 下缓存文件编码的问题
- 在缓存意外不合法时现在会重新抓取

# v3.8.3(2020-11-12)
* bugfix
- sticker类别变多了,默认黑白名单屏蔽sticker的时候使用通配符;
- 增加了一些校验,防止特殊情况数据不存在时报错:
+ 写cache前校验爬取内容是否超时为None;
+ 数据表为空校验。没爬到数据就不给出建议了;
+ 获取steam历史价格返回时,增加'prices' key存在性校验;
16 changes: 8 additions & 8 deletions config/config.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[BASIC]
# 网页登录buff和steam后,把浏览器的cookie贴到这里,才能运行。推荐从Chrome中复制,下面是两条示例cookie
buff_cookie = _ga=GA1.2.162602080.1551374933; _ntes_nuid=8ce0cf6bdce55512e73f49cb8a49960e; mail_psc_fingerprint=d80ec72871726e9b192181fd1a3633d6; OUTFOX_SEARCH_USER_ID_NCOO=29659292.15961449; Device-Id=33u998YqmNWbhH5GbWUo; vjuids=369cb7d82.170e16a9519.0.3eb2c52902997; vjlast=1584329824.1584329824.30; _ntes_nnid=8ce0cf6bdce55512e73f49cb8a49960e,1584329823520; vinfo_n_f_l_n3=d81bf3a25989eb31.1.4.1561837557589.1576393349946.1585037711031; NTES_CMT_USER_INFO=305053074%7C%E6%9C%89%E6%80%81%E5%BA%A6%E7%BD%91%E5%8F%8B0ibHSi%7Chttp%3A%2F%2Fcms-bucket.nosdn.127.net%2F2018%2F08%2F13%2F078ea9f65d954410b62a52ac773875a1.jpeg%7Cfalse%7CeWQuNzU3YTdkZjAwZWNiNDJlOGJAMTYzLmNvbQ%3D%3D; nts_mail_user=shining63.com:-1:1; Locale-Supported=zh-Hans; game=csgo; _gid=GA1.2.408819780.1602387923; NTES_YD_SESS=2ad4oMUZrpNSwralkVQFGR_rtV_TZgYPUG_YgOJvmm9n.6Js.4TFkURWXExDgqyQtJ6h2Xb5ffQVQtqcw3wjTyevM11skcblbUdx4.PVDePscTo5TxGZkWFOFlDL1QAZtPU9d_0I0UegOlOjKEeBL8Eh8RAUnitaCpIfkdkkdS1pjSmSo4ivvdcaUmWcEdxlgDOgqYTMRsxdkXcseYgy0YQ4b8UJhNoWKeWEG_FUnB; S_INFO=1602387932|0|3&80##|; P_INFO=|1602387932|1|netease_buff|00&99|bej&1599745561&netease_buff#bej&null#10#0#0|&0|null|; remember_me=U1094050600|UrFjxmWDgUX4vfX9pphq8i2cTtkEz0d2; session=1-eV6CEf3tgQ9qCDS-6e2xQlEjJXSVuGqp3JNAfLwBVSGe2046524528; csrf_token=ImI2NTcyNThkYmU2Mzg2MjIxMzBhMTE3NzgyYWJiMGM5YzEwYWY5NmEi.EmQUig.oRWIRxnDlzWlMYX4ps9paRW2eWo
steam_cookie = steamMachineAuth76561198251761676=B89D7B0897180E54C9F2E93F8AAFA4583CAADE7D; timezoneOffset=28800,0; _ga=GA1.2.1902489943.1551205764; steamMachineAuth76561198874249759=E46DCE6095514E3D489CAF1E7CBC3F9F8CD3ACC6; browserid=1066728544083117486; recentlyVisitedAppHubs=271590%2C80%2C730; sessionid=afd31a4ca7e900c9125edf89; steamCountry=US%7C630911856130eec5e4910b6ffb8dbdde; _gid=GA1.2.2064465479.1602388630; steamLoginSecure=76561198251761676%7C%7CF29B04B445D4E8B30D2AD04AA974588C1FAB7EDE; steamRememberLogin=76561198251761676%7C%7Ca5e43585d1cd13db87c3d856d7676178; webTradeEligibility=%7B%22allowed%22%3A1%2C%22allowed_at_time%22%3A0%2C%22steamguard_required_days%22%3A15%2C%22new_device_cooldown_days%22%3A7%2C%22time_checked%22%3A16388657%7D; strInventoryLastContext=730_2; tsTradeOffersLastRead=1662594
buff_cookie = _ga=GA1.2.162602080.1551374933; _ntes_nuid=8ce0cf6bdce55512e73f49cb8a49960e; mail_psc_fingerprint=d80ec72871726e9b192181fd1a3633d6; OUTFOX_SEARCH_USER_ID_NCOO=29659292.15961449; Device-Id=33u998YqmNWbhH5GbWUo; vjuids=369cb7d82.170e16a9519.0.3eb2c52902997; vjlast=1584329824.1584329824.30; _ntes_nnid=8ce0cf6bdce55512e73f49cb8a49960e,1584329823520; vinfo_n_f_l_n3=d81bf3a25989eb31.1.4.1561837557589.1576393349946.1585037711031; NTES_CMT_USER_INFO=305053074%7C%E6%9C%89%E6%80%81%E5%BA%A6%E7%BD%91%E5%8F%8B0ibHSi%7Chttp%3A%2F%2Fcms-bucket.nosdn.127.net%2F2018%2F08%2F13%2F078ea9f65d954410b62a52ac773875a1.jpeg%7Cfalse%7CeWQuNzU3YTdkZjAwZWNiNDJlOGJAMTYzLmNvbQ%3D%3D; [email protected]:-1:1; __oc_uuid=ed078220-12cd-11eb-8ff5-199a4d2b4ac4; Locale-Supported=zh-Hans; game=csgo; _gid=GA1.2.648285736.1605190175; _gat_gtag_UA_109989484_1=1; NTES_YD_SESS=SN.CH9UV_zHPlqCiCLvgBOoLTrvc2fBGRyieqBhbqAP1HglxHydKU4DQmq5B7At06lgkSmM_II0j06AJnuMWYnpdtYe8PPxUJMsM4X5yH3jBY3xJdC_d59nM8A1bksgKL51SSXhh3Rbd4SeDy6ZIwse2MUjzElPeLdPKBaMoZafPdtNUF9E67TduT0krt3r6_s46hz3dnGE.y20NruVavQP3kETqQCAqK6iZ3b0Nc6tJw; S_INFO=1605190207|0|3&80##|2051; P_INFO=2051|1605190207|1|netease_buff|00&99|bej&1602387932&netease_buff#bej&null#10#0#0|&0|null|2051; remember_me=U1094050600|T3zeeLJIc6y9kVtTTAGV0mdqvIXDpeX0; session=1-WfP1TH9yGjtZniGRmbfFSezTOMS-ZeYguhJFzDIT5Fem2046524528; csrf_token=ImFjMWE4YTc4MDFkMTAyZjYyYWZhZWVhYzllZGFlNTJiZjc1NWE1MDEi.Eo7TwA.f39WinRhrzJgSTG4as2EjhD6za0
steam_cookie = ActListPageSize=100; steamMachineAuth76561198251761676=B89D7B0897180E54C9F2E93F8AAFA4583CAADE7D; timezoneOffset=28800,0; _ga=GA1.2.1902489943.1551205764; steamMachineAuth76561198874249759=E46DCE6095514E3D489CAF1E7CBC3F9F8CD3ACC6; browserid=1066728544083117486; recentlyVisitedAppHubs=271590%2C80%2C730; Steam_Language=english; steamCountry=US%7C4705a9aaf22f908f9e4452081abd865a; sessionid=56b51232f9f3936a0ebbf88d; _gid=GA1.2.1847664544.1605190173; steamLoginSecure=76561198251761676%7C%7CE4B6E3BBDD5AF069692D8C8A56755ECBB34ECC68; steamRememberLogin=76561198251761676%7C%7Ca5e43585d1cd13db87c3d856d7676178; webTradeEligibility=%7B%22allowed%22%3A1%2C%22allowed_at_time%22%3A0%2C%22steamguard_required_days%22%3A15%2C%22new_device_cooldown_days%22%3A7%2C%22time_checked%22%3A1605190194%7D
# 提供一个代理来访问Steam社区市场
proxy = socks5://127.0.0.1:10808

Expand All @@ -18,7 +18,7 @@ url_cache_hour = 6
# 无视缓存爬取数据
force_crawl = False
# 一次请求的超时重试次数
retry_times = 4
retry_times = 3

# 基本参数设置
[COMMON]
Expand All @@ -28,18 +28,18 @@ steam_sell_tax = 0.15
# 过滤行为
[FILTER]
# 爬取物品的最低价格,价格过低则不考虑
crawl_min_price_item = 100
crawl_min_price_item = 150
# 爬取物品的最高价格
crawl_max_price_item = 200
crawl_max_price_item = 160
# 7天交易历史,少于该阈值则认为是冷门物品,不考虑
min_sold_threshold = 70
# 爬取类别白名单,如只想爬取AK和M4(A1 & A4),则设置为:["weapon_ak47", "weapon_m4a1", "weapon_m4a1_silencer"]
# 具体类别参考`config/reference/category.md`,详见README
# 黑白名单均支持通配符匹配,如 weapon_knife* 等,更多用法请搜索 "Shell 通配符"
# 黑白名单均支持通配符匹配,如'weapon_knife*'等,更多用法请搜索 "Shell 通配符"
category_white_list = []
# 爬取类别黑名单。如果黑名单白名单同时存在,白名单优先级更高
# 默认的黑名单加了以下内容,排除掉乱七八糟的武器箱音乐盒印花探员之类的
category_black_list = ["sticker", "csgo_type_tool", "csgo_type_spray", "csgo_type_collectible", "csgo_type_ticket", "csgo_tool_gifttag", "csgo_type_musickit", "csgo_type_weaponcase", "csgo_tool_weaponcase_keytag", "type_customplayer", "csgo_tool_patch"]
# 默认的黑名单加了以下内容,排除掉乱七八糟的武器箱音乐盒印花探员之类的,刀也排除掉了(不会真有人steam里卖刀吧:D)
category_black_list = ["*sticker*", "*knife*", "csgo_type_tool", "csgo_type_spray", "csgo_type_collectible", "csgo_type_ticket", "csgo_tool_gifttag", "csgo_type_musickit", "csgo_type_weaponcase", "csgo_tool_weaponcase_keytag", "type_customplayer", "csgo_tool_patch"]

# 结果设置
[RESULT]
Expand Down
3 changes: 2 additions & 1 deletion src/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

table = item_crawler.crawl()

if table is not None:
# table may be empty if no data is received due to timeout
if (table is not None) and (not table.empty):
# suggestion
suggestion.suggest(table)
else:
Expand Down
5 changes: 3 additions & 2 deletions src/crawl/history_price_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from datetime import datetime

from src.config.urls import steam_price_history_url
from src.util.requester import get_json_dict, steam_cookies
from src.util.logger import log
from src.util.requester import get_json_dict, steam_cookies


def crawl_item_history_price(index, item, total_price_number):
Expand All @@ -13,7 +13,8 @@ def crawl_item_history_price(index, item, total_price_number):
log.info('GET steam history price {}/{} for ({}): {}'.format(index, total_price_number, item.name, steam_price_url))
steam_history_prices = get_json_dict(steam_price_url, steam_cookies, True)

if steam_history_prices is not None:
# key existence check
if (steam_history_prices is not None) and ('prices' in steam_history_prices):
raw_price_history = steam_history_prices['prices']
if len(raw_price_history) > 0:
days = min((datetime.today().date() - datetime.strptime(raw_price_history[0][0], '%b %d %Y %H: +0').date()).days, 7)
Expand Down
5 changes: 4 additions & 1 deletion src/crawl/item_crawler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
# from tqdm import tqdm

from src.config.definitions import CRAWL_MIN_PRICE_ITEM, CRAWL_MAX_PRICE_ITEM, BUFF_COOKIE, FORCE_CRAWL
from src.config.urls import goods_section_root_url, goods_root_url, goods_section_page_url
Expand Down Expand Up @@ -67,8 +68,10 @@ def crawl_website():

# crawl by categories and price section
if len(raw_categories) != len(categories):
for category in categories:
total_category = len(categories)
for index, category in enumerate(categories, start=1):
csgo_items.extend(crawl_goods_by_price_section(category))
log.info('GET category {}/{} for ({}).'.format(index, total_category, category))
else:
# crawl by price section without category
csgo_items.extend(crawl_goods_by_price_section(None))
Expand Down
4 changes: 2 additions & 2 deletions src/util/persist_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ def tabulate(csgo_items):


def table_info(table):
log.info(table)
log.info(table.describe())
# log.info(table)
log.info('Total Items Summary:\n{}'.format(table.describe()))
log.info('\n')
9 changes: 7 additions & 2 deletions src/util/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,10 @@ def get_json_dict(url, cookies, proxy = False, times = 1):
if exist(url):
return json.loads(fetch(url))
json_data = get_json_dict_raw(url, cookies, proxy, times)
store(url,json_data)
return json.loads(json_data)

if json_data is None:
return None
else:
# can not store None
store(url, json_data)
return json.loads(json_data)

0 comments on commit 9adf192

Please sign in to comment.