- 安装部署 Databend
- 下载 ontime 数据
- 创建 ontime 表
- 创建帐户
- 修改 python 脚运行加载数据
- 测试 SQL
mkdir tsv
cd tsv
wget https://datasets.databend.rs/t_ontime/ontime_{0..9}{0..9}.tsv.gz
curl https://datasets.databend.rs/t_ontime/create_table.sql | mysql -h 127.0.0.1 -P3307 -uroot
mysql -h 127.0.0.1 -P3307 -uroot
mysql>create user 'wubx'@'%' idetified by 'wubxwubx';
mysql>grant all privileges on *.* to 'wubx'@'%';
python3 local_load.py
select version();
-- 整体行数统计
SELECT count(*) FROM ontime;
-- 区间数据扫描
SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC;
select count(*) FROM ontime WHERE Year >= 2000 AND Year <= 2008;
SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC;
SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10;
SELECT IATA_CODE_Reporting_Airline AS Carrier, count() as c FROM ontime WHERE DepDelay>10 AND Year = 2007 GROUP BY Carrier ORDER BY c DESC;
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(cast(DepDelay>10 as Int8))*1000 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier ORDER BY c3 DESC;
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(cast(DepDelay>10 as Int8))*1000 AS c3 FROM ontime WHERE Year>=2000 AND Year <=2008 GROUP BY Carrier ORDER BY c3 DESC;
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay) * 1000 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier;
-- 全表扫
SELECT Year, avg(DepDelay) FROM ontime GROUP BY Year;
SELECT Year, count(*) as c1 FROM ontime GROUP BY Year;
SELECT avg(cnt) FROM (SELECT Year,Month,count(*) AS cnt FROM ontime WHERE DepDel15=1 GROUP BY Year,Month) a;
SELECT avg(c1) FROM (SELECT Year,Month,count(*) AS c1 FROM ontime GROUP BY Year,Month) a;
SELECT OriginCityName, DestCityName, count(*) AS c FROM ontime GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10;
SELECT OriginCityName, count(*) AS c FROM ontime GROUP BY OriginCityName ORDER BY c DESC LIMIT 10;