diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b6e4761
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/LICENSE b/LICENSE
new file mode 100755
index 0000000..5dc609b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Lewis Tian
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100755
index 0000000..33ba210
--- /dev/null
+++ b/README.md
@@ -0,0 +1,231 @@
+# 我到底在华科吃了些啥 (华科校园卡年度报告)
+
+
+
+## Table of Contents
+
+- [起因](#起因)
+- [数据获取](#数据获取)
+- [数据分析](#数据分析)
+ - [2019 数据详情](#2019-数据详情)
+ - [2020 数据详情](#2020-数据详情)
+- [如何使用](#如何使用)
+
+## 起因
+
+今天(2019-12-23)在东一一楼烧腊窗口恰饭,我点的是鸡排饭,然后我没说要啥汁,那个姐姐(感觉叫阿姨不太对)直接说:番茄是吧。我:?,我挺疑惑的,然后在她把饭递给我的时候问了句:你咋知道的?她:因为你经常吃啊(笑)。
+
+想了下好像也没有经常吃吧,虽然之前高中也有过类似的经历,那是因为吃拉面不要香菜、热干面不要葱、炒面炒粉不要葱,然后吃了一段时间就被老板记住了。
+
+很有趣的一次是吃热干面,老板没抬头看,就调好葱姜蒜芝麻酱准备递给我,然后看到我,说:哎呀,你不要葱,这碗给后面的同学,重新给你下一碗 2333。扯远了,因为挺好奇的,于是就打算把今年这一年的吃饭记录都爬下来康康,我这一年都吃的是啥?
+
+## 数据获取
+
+从 [校园卡服务平台](http://ecard.hust.edu.cn/Default.aspx) 可以查到流水,开始我以为所谓的“导出所有”是把所有记录导出,结果跟“导出当前页流水”是一样的。
+
+所以直接用 Python 写了个爬虫直接把今年(2019)所有月份数据都爬下来,去掉多余的信息,仅保存 *时间*、*价格*、*食堂* 这三项数据,然后存为 CSV 文件。
+
+- 过程:
+
+
+
+- 结果:
+
+
+
+## 数据分析
+
+所以这一年到底吃了啥?下面就见分晓!
+
+首先将这些 csv 文件合并,使用 `pandas` 库很简单,下面就是:
+
+```python
+def merge_all_files():
+ files = glob.glob("csv/*.csv")
+ df = pd.concat([pd.read_csv(file) for file in files])
+ df.to_csv("csv/2019.csv", index=False, encoding='utf-8-sig')
+```
+
+
+ 2019
+
+### 2019 数据详情
+
+截止到今天,每个月使用一卡通的消费次数(包括超市和自动售卖机):
+
+```
+{
+ 1: 95,
+ 2: 50,
+ 3: 142,
+ 4: 121,
+ 5: 131,
+ 6: 111,
+ 7: 104,
+ 8: 138,
+ 9: 150,
+ 10: 102,
+ 11: 125,
+ 12: 104
+}
+```
+
+
+
+
+
+
+各个食堂窗口的食用情况(指的是刷卡次数,吃个晚饭可能会刷几次卡)TOP10
+
+```bash
+集贤楼食堂红案: 249
+集贤楼蒸菜净荤组: 123
+东一二楼特色菜品: 108
+集贤楼食堂商店: 99
+东一二楼华科速7: 84
+东一一楼蒸点稀食: 81
+东一二楼湘味小钵: 70
+东一一楼烧腊饭: 68
+东一二楼大众菜(一): 66
+集贤楼食堂煎烙: 65
+```
+
+
+
+
+
+
+各个食堂的食用情况
+
+```
+{
+ '东一': 581,
+ '集贤楼': 575,
+ '自助售货机': 9,
+ '西一': 139,
+ '百品屋': 8,
+ '集锦园': 4,
+ '校园网': 6,
+ '图书馆': 1,
+ '东学超市': 3,
+ '东三': 27,
+ '紫荆园': 4,
+ '百景': 12,
+ '喻园': 4
+}
+```
+
+
+
+
+
+
+
+
+
+ 2020
+
+### 2020 数据详情
+
+上面过程和分析都是 19 年写的,偶然翻代码翻到这个东西,于是今年(2021)更新了下。
+
+1、各个食堂的食用情况
+
+| 食堂 | 消费次数 |
+|:----------:|:-------:|
+| 东一 | 587 |
+| 自助售货机 | 58 |
+| 东学超市 | 9 |
+| 集贤楼 | 6 |
+| 校医院 | 2 |
+| 集锦园 | 4 |
+| 东三 | 2 |
+| 校园网 | 2 |
+| 后勤开水机 | 2 |
+| 百景 | 10 |
+
+
+
+2、各个窗口的食用情况 top 10
+
+| 窗口 | 消费次数 |
+|:--------------------:|:-------:|
+| 东一二楼华科速7 | 58 |
+| 东一二楼大众菜(二) | 60 |
+| 自助售货机 | 58 |
+| 东一二楼香霸王卤肉饭 | 30 |
+| 东一一楼蒸点稀食 | 101 |
+| 东一二楼特色菜品 | 55 |
+| 东学超市柜三 | 2 |
+| 东学超市柜一 | 6 |
+| 东一二楼湘味小钵 | 81 |
+| 东一一楼烧腊饭 | 7 |
+
+
+
+3、各时间段的食用情况
+
+
+
+4、各个月的食用情况
+
+
+
+5、总体概况
+
+在 2020,你连续在 东一二楼大众菜(一) 窗口消费了 4 次,看来你很喜欢这个窗口!
+
+在 2020,你一共消费了 682 次,共花费 3331.35 元!
+
+
+
+## 如何使用
+
+运行 `ecard.py` 前需要登录拿到 `JSESSIONID` 然后填到 `ecard.py` 对应位置;画图前记得解压字体文件:`SourceHanSansCN-Light.7z`,也可以自定义字体,修改 `utils.py` 中的 `myfont = FontProperties(fname="SourceHanSansCN-Light.otf")` 即可。
+
+```Bash
+git clone git@github.com:taseikyo/hust-ecard-annual.git
+cd hust-ecard-annual
+pip3 install -r requirements.txt
+# 获取数据
+python3 ecard.py
+# 画图
+python3 utils.py
+```
+
+由于校园卡消费记录的是食堂窗口,所以为了提取出食堂我是设了几个食堂关键词:
+
+```
+HALLS = {
+ "东一",
+ "集贤楼",
+ "自助售货机",
+ "西一",
+ "百品屋",
+ "集锦园",
+ "校园网",
+ "图书馆",
+ "东学超市",
+ "东三",
+ "紫荆园",
+ "百景",
+ "喻园",
+}
+```
+
+当然这些肯定是不全的,毕竟有一共三十多个食堂,所以在代码里面不在 `HALLS` 的食堂直接保存。
+
+```Python
+has_found = False
+for hall in HALLS:
+ if row[-1].find(hall) >= 0:
+ has_found = True
+ halls[hall] += 1
+ break
+if not has_found:
+ halls[row[-1]] += 1
+```
+
+## LICENSE
+
+Copyright (c) 2019 Lewis Tian. Licensed under the MIT license.
diff --git a/SourceHanSansCN-Light.7z b/SourceHanSansCN-Light.7z
new file mode 100755
index 0000000..21c5544
Binary files /dev/null and b/SourceHanSansCN-Light.7z differ
diff --git a/csv/2019.7z b/csv/2019.7z
new file mode 100755
index 0000000..40b454f
Binary files /dev/null and b/csv/2019.7z differ
diff --git a/csv/2020.7z b/csv/2020.7z
new file mode 100755
index 0000000..47ab0ce
Binary files /dev/null and b/csv/2020.7z differ
diff --git a/ecard.py b/ecard.py
new file mode 100755
index 0000000..4e67775
--- /dev/null
+++ b/ecard.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Date : 2019-12-23 14:48:11
+# @Author : Lewis Tian (taseikyo@gmail.com)
+# @Link : github.com/taseikyo
+# @Version : python3.8
+
+"""
+retrieve and save my ecard consume detail
+"""
+
+import csv
+import random
+import time
+
+import requests
+
+DETAIL = []
+
+
+def one_moonth(year=2019, month=12, page=1, total=0):
+ global DETAIL
+ url = "http://218.199.85.15/pcard/gettrjndataList.action"
+ headers = {
+ "Cookie": "JSESSIONID={xxx}",
+ "Referer": "http://218.199.85.15/pcard/pcard/acchistrjn.action",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ post_data = {
+ "page": f"{page}",
+ "rp": "10",
+ "sortname": "jndatetime",
+ "sortorder": "desc",
+ "query": "",
+ "qtype": "",
+ "accquary": "215799",
+ "trjnquary": f"{year}-{month:02}",
+ }
+
+ print(f"retrieve {year}-{month} page {page} data...")
+ r = requests.post(url, headers=headers, data=post_data)
+ data = r.json()
+
+ for row in data["rows"]:
+ # bank card transfer
+ if row["cell"][5] == "0":
+ continue
+ e_time = row["cell"][0]
+ e_money = row["cell"][3][1:]
+ e_hall = row["cell"][8].strip()
+ temp = [e_time, e_money, e_hall]
+ DETAIL.append(temp)
+
+ total += 10
+ if total < data["total"]:
+ time.sleep(random.randint(1000, 2000) / 1000)
+ one_moonth(year, month, page + 1, total)
+ else:
+ dump_as_csv(year, month)
+
+
+def dump_as_csv(year, month):
+ global DETAIL
+ print(f"save {year}-{month} data...")
+ with open(f"csv/{year}-{month}.csv", "w", encoding="utf-8", newline="") as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["e_time", "e_money", "e_hall"])
+ writer.writerows(DETAIL)
+ DETAIL = []
+
+
+if __name__ == "__main__":
+ for month in range(1, 13):
+ one_moonth(2020, month=month)
diff --git a/images/TIM20191223152311.png b/images/TIM20191223152311.png
new file mode 100755
index 0000000..47894d5
Binary files /dev/null and b/images/TIM20191223152311.png differ
diff --git a/images/consume_times.png b/images/consume_times.png
new file mode 100755
index 0000000..ac9c9db
Binary files /dev/null and b/images/consume_times.png differ
diff --git a/images/consume_times_2020.png b/images/consume_times_2020.png
new file mode 100755
index 0000000..f24da1c
Binary files /dev/null and b/images/consume_times_2020.png differ
diff --git a/images/hall_times.png b/images/hall_times.png
new file mode 100755
index 0000000..9d29d4e
Binary files /dev/null and b/images/hall_times.png differ
diff --git a/images/hall_times_2020.png b/images/hall_times_2020.png
new file mode 100755
index 0000000..1dc0d61
Binary files /dev/null and b/images/hall_times_2020.png differ
diff --git a/images/hours_2020.png b/images/hours_2020.png
new file mode 100755
index 0000000..bb0b825
Binary files /dev/null and b/images/hours_2020.png differ
diff --git a/images/hust.jpg b/images/hust.jpg
new file mode 100755
index 0000000..4935da1
Binary files /dev/null and b/images/hust.jpg differ
diff --git a/images/record.gif b/images/record.gif
new file mode 100755
index 0000000..045c9db
Binary files /dev/null and b/images/record.gif differ
diff --git a/images/windows_times.png b/images/windows_times.png
new file mode 100755
index 0000000..4696641
Binary files /dev/null and b/images/windows_times.png differ
diff --git a/images/windows_times_2020.png b/images/windows_times_2020.png
new file mode 100755
index 0000000..173aa04
Binary files /dev/null and b/images/windows_times_2020.png differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100755
index 0000000..1131ee3
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+requests
+matplotlib
+pandas
+seaborn
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100755
index 0000000..3296399
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Date : 2019-12-23 15:41:06
+# @Author : Lewis Tian (taseikyo@gmail.com)
+# @Link : github.com/taseikyo
+# @Version : python3.8
+
+import calendar
+import csv
+import glob
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from matplotlib.font_manager import FontProperties
+
+myfont = FontProperties(fname="SourceHanSansCN-Light.otf")
+
+sns.set(
+ style="ticks",
+ font=myfont.get_name(),
+ rc={
+ "figure.figsize": [16, 9],
+ "text.color": "white",
+ "axes.labelcolor": "white",
+ "axes.edgecolor": "white",
+ "xtick.color": "white",
+ "ytick.color": "white",
+ "axes.facecolor": "#443941",
+ "figure.facecolor": "#443941",
+ },
+)
+
+# 不全,需要手动添加
+HALLS = {
+ "东一",
+ "集贤楼",
+ "自助售货机",
+ "西一",
+ "百品屋",
+ "集锦园",
+ "校园网",
+ "图书馆",
+ "东学超市",
+ "东三",
+ "紫荆园",
+ "百景",
+ "喻园",
+}
+
+
+def merge_all_files(year=2019):
+ """
+ 汇总所有月份的 csv
+ """
+ files = glob.glob("csv/*.csv")
+ df = pd.concat([pd.read_csv(file) for file in files])
+ df.to_csv(f"csv/{year}.csv", index=False, encoding="utf-8-sig")
+
+
+def draw_consume_times(year=2019):
+ """
+ 按月份显示消费次数
+ """
+ times = {}
+ for x in range(1, 13):
+ with open(f"csv/{year}-{x}.csv", encoding="utf-8") as f:
+ lines = f.readlines()
+ times[x] = len(lines) - 1
+ print(times)
+ plt.figure(figsize=(16, 6))
+ plt.plot(list(times.keys()), list(times.values()), label="消费次数", color="white")
+ plt.legend()
+ # 图上画出数据
+ x = range(1, len(times) + 1)
+ y_text = list(times.values())
+ for i in range(len(times)):
+ plt.text(x[i], y_text[i] + 2, y_text[i], ha="center", fontsize=12)
+
+ plt.grid(False)
+ plt.xlabel("月份", fontsize=16)
+ plt.ylabel("次数", fontsize=16)
+ plt.xticks(range(14), [""] + calendar.month_name[1:13] + [""])
+ plt.title("每月的消费次数", fontsize=20)
+ plt.show()
+
+
+def get_all_windows_halls(year=2019):
+ """
+ 食堂窗口 & 食堂
+ """
+ halls = defaultdict(int)
+ windows = defaultdict(int)
+ for x in range(1, 13):
+ with open(f"csv/{year}-{x}.csv", encoding="utf-8") as f:
+ next(f)
+ reader = csv.reader(f)
+ for row in reader:
+ windows[row[-1]] += 1
+ has_found = False
+ for hall in HALLS:
+ if row[-1].find(hall) >= 0:
+ has_found = True
+ halls[hall] += 1
+ break
+ if not has_found:
+ halls[row[-1]] += 1
+ print(halls)
+ with open("csv/halls.csv", "w", encoding="utf-8", newline="") as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["e_hall", "e_count"])
+ writer.writerows(halls.items())
+ with open("csv/windows.csv", "w", encoding="utf-8", newline="") as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["e_windows", "e_count"])
+ writer.writerows(windows.items())
+
+
+def draw_bars(path, title, colname, rotate=0):
+ df = pd.read_csv(path)
+ df = df.sort_values("e_count", ascending=False)
+ print(df.head(10))
+ plt.figure(figsize=(16, 6))
+ plt.bar(df[f"{colname}"], df.e_count, label="消费次数", color="white")
+ plt.legend()
+ # plt.grid(False)
+ # 图上画出数据
+ x = range(len(df.e_count) + 1)
+ y_text = list(df.e_count)
+ for i in range(len(df.e_count)):
+ plt.text(x[i], y_text[i] + 2, y_text[i], ha="center", fontsize=12)
+ plt.xlabel(title, fontsize=16)
+ plt.ylabel("次数", fontsize=16)
+ plt.xticks(rotation=rotate)
+ plt.title(f"{title}的消费次数", fontsize=20)
+ plt.tight_layout()
+ plt.show()
+
+
+def draw_hour_times(year=2019):
+ hours = defaultdict(int)
+ with open(f"csv/{year}.csv", encoding="utf-8") as f:
+ next(f)
+ for line in f:
+ hour = int(line.split(":")[0].split(" ")[1])
+ hours[hour] += 1
+ print(hours)
+ plt.figure(figsize=(16, 6))
+ x, y = [], []
+ for i, j in sorted(hours.items(), key=lambda x: x[0]):
+ x.append(i)
+ y.append(j)
+ print(x, y)
+ plt.plot(x, y, label="消费次数", color="white")
+ plt.xlabel("小时", fontsize=16)
+ plt.ylabel("次数", fontsize=16)
+ plt.title("每时间段的消费次数", fontsize=20)
+ plt.xticks(range(x[0], x[-1]+1))
+ # 图上画出数据
+ for i in range(len(hours)):
+ plt.text(x[i]+0.1, y[i] + 2, y[i], ha="center", fontsize=12)
+ plt.show()
+
+def max_continue_times(year=2019):
+ """
+ 最大连续次数
+ """
+ max_time = 0
+ cur_time = 1
+ max_hall = None
+ pre_element = None
+ with open(f"csv/{year}.csv", encoding="utf-8") as f:
+ next(f)
+ reader = csv.reader(f)
+ for row in reader:
+ if row[2] == pre_element:
+ cur_time += 1
+ if max_time < cur_time:
+ max_time = cur_time
+ max_hall = pre_element
+ else:
+ pre_element = row[2]
+ cur_time = 1
+ print(f"在 {year},你连续在 {max_hall} 窗口消费了 {max_time} 次,看来你很喜欢这个窗口!")
+
+
+def get_total_money_time(year=2019):
+ """
+ 总消费次数、钱数
+ """
+ df = pd.read_csv(f"csv/{year}.csv")
+ print(f"在 {year},你一共消费了 {len(df)} 次,共花费 {df.e_money.sum()} 元!")
+
+
+if __name__ == "__main__":
+ merge_all_files(2020)
+ draw_consume_times(2020)
+ get_all_windows_halls(2020)
+ draw_bars("csv/halls.csv", "食堂", "e_hall")
+ draw_bars("csv/windows.csv", "食堂窗口", "e_windows", 35)
+ draw_hour_times(2020)
+ max_continue_times(2020)
+ get_total_money_time(2020)