Skip to content

Commit

Permalink
Merge pull request #51 from Mytherin/duckdb060
Browse files Browse the repository at this point in the history
Update DuckDB results to v0.6.0
  • Loading branch information
alexey-milovidov authored Nov 25, 2022
2 parents e213e74 + e29c316 commit 71d6cfe
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 64 deletions.
2 changes: 0 additions & 2 deletions duckdb/README.md

This file was deleted.

2 changes: 1 addition & 1 deletion duckdb/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
gzip -d hits.csv.gz

./load.py
# 4216.5390389899985 seconds
# 414 seconds

# Run the queries

Expand Down
3 changes: 1 addition & 2 deletions duckdb/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,5 @@ CREATE TABLE hits
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL,
PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
CLID INTEGER NOT NULL
);
16 changes: 11 additions & 5 deletions duckdb/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@
import psutil

con = duckdb.connect(database="my-db.duckdb", read_only=False)
# See https://github.com/duckdb/duckdb/issues/3969
con.execute("PRAGMA memory_limit='{}b'".format(psutil.virtual_memory().total / 4))
con.execute("PRAGMA threads={}".format(psutil.cpu_count(logical=False)))

print("Will load the data")

# enable the progress bar
con.execute('PRAGMA enable_progress_bar')
con.execute('PRAGMA enable_print_progress_bar;')
# enable parallel CSV loading
con.execute("SET experimental_parallel_csv=true")
# disable preservation of insertion order
con.execute("SET preserve_insertion_order=false")

# perform the actual load
print("Will load the data")
start = timeit.default_timer()
con.execute(open("create.sql").read())
con.execute("INSERT INTO hits SELECT * FROM read_csv_auto('hits.csv')")
con.execute("COPY hits FROM 'hits.csv'")
end = timeit.default_timer()
print(end - start)
4 changes: 2 additions & 2 deletions duckdb/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Expand Down
7 changes: 2 additions & 5 deletions duckdb/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@
print(query)

con = duckdb.connect(database="my-db.duckdb", read_only=False)
# See https://github.com/duckdb/duckdb/issues/3969
con.execute("PRAGMA memory_limit='{}b'".format(psutil.virtual_memory().total / 4))
con.execute("PRAGMA threads={}".format(psutil.cpu_count(logical=False)))

for try_num in range(3):
start = timeit.default_timer()
con.execute(query)
results = con.execute(query).fetchall()
end = timeit.default_timer()
print(end - start)
del results
93 changes: 46 additions & 47 deletions duckdb/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,57 @@
{
"system": "DuckDB",
"date": "2022-07-01",
"date": "2022-11-15",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "Many queries triggered OOM",

"tags": ["C++", "column-oriented", "embedded"],

"load_time": 4217,
"data_size": 27241492480,
"load_time": 416,
"data_size": 25024802816,

"result": [
[0.005694353996659629,0.003944558004150167,0.003837226002360694],
[0.16991353100456763,0.03919722700084094,0.03835860399703961],
[0.44898432699847035,0.04947217500011902,0.04852217998995911],
[0.07586832098604646,0.07051395199960098,0.07007493599667214],
[9.554053236002801,8.153356187991449,8.73448242500308],
[7.66042533799191,6.931124911992811,7.103380946995458],
[0.030703739990713075,0.027668555994750932,0.027583695002249442],
[0.1778664360026596,0.03942437999648973,0.03882004099432379],
[8.53439180701389,8.869582625004114,9.020313234999776],
[10.40215514000738,11.125320470004226,8.941559945000336],
[1.1747649609897053,1.04221136700653,1.004799570000614],
[1.2380354650085792,1.1211603130068397,2.4278587239969056],
[3.1751541379926493,0.9360461989999749,0.8868292279948946],
[6.855684430003748,7.300301584007684,5.712960822012974],
[3.70588762400439,1.0249276379909134,0.9473389159975341],
[2.1037107890006155,1.6215517020027619,1.5671920729946578],
[null,null,null],
[null,null,null],
[null,null,null],
[0.0002772739971987903,0.00016792300448287278,0.0001574420020915568],
[null,null,null],
[null,null,null],
[null,null,null],
[null,null,null],
[2.9310110910009826,0.19020285899750888,0.1736805049877148],
[2.939304119994631,0.18754731099761557,0.18073286200524308],
[2.8706370779982535,0.18822155400994234,0.17905898999015335],
[null,null,null],
[null,null,null],
[0.884408778991201,0.714329167996766,0.7135983259940986],
[5.3762675570033025,0.8803737630078103,0.8728962720051641],
[7.249190265996731,2.9648747390019707,2.866687831003219],
[null,null,null],
[null,null,null],
[null,null,null],
[4.515183198003797,4.030519469000865,4.014251719010645],
[0.11604027298744768,0.040539135996368714,0.04280066800129134],
[0.0457908230018802,0.021069509006338194,0.019683108999743126],
[0.0680370800109813,0.011889394998434,0.01056639499438461],
[0.22029169600864407,0.08547276000899728,0.09095505000732373],
[0.03759863799496088,0.008373684002435766,0.007633563989656977],
[0.025631797994719818,0.008081699008471332,0.007858585988287814],
[0.034359957004198804,0.025543516996549442,0.02533275399764534]
[0.007988478000015675,0.004515659999924537,0.004503920000161088],
[0.11100315900011992,0.0329610019998654,0.028879086999950232],
[0.6478460649998397,0.05915705200004595,0.05621285400002307],
[1.323183034000067,0.0455285499999718,0.04504826400011552],
[1.222814291000077,0.7548531599998114,0.7589927649999026],
[2.8619665539999914,0.7005150949999006,0.6948298479999266],
[0.11942326999997022,0.05013811599997098,0.04905426100003751],
[0.14966445099980774,0.034241171000076065,0.03345654100007778],
[4.244694572000071,0.8486790140000267,0.8334544100000585],
[3.0409637129998828,1.1786685279998892,1.1486964670000361],
[1.2471184640000956,0.42324365099989336,0.3113840030000574],
[4.312250543000118,0.2591428690000157,0.25120333299992126],
[3.587442906999968,0.5870498879999104,0.5784097270000075],
[6.9048232530001314,1.0046518700000888,0.987080990999857],
[4.13467192600001,0.6404815850000887,0.6235957799999596],
[2.0105650860000424,0.8113902259999577,0.78756950900015],
[3.248947224999938,2.7703119679999872,1.6008261179999863],
[3.2351198680000834,1.5603282849999687,1.5319727170001443],
[10.72432485399986,3.070198415999812,3.065633552999998],
[2.3459199960000205,0.43844037300004857,0.44263849999993],
[22.106918537999945,0.8214904700000716,0.815211096999974],
[22.445440309000105,0.5390162549999786,0.5336183530000653],
[38.162022707999995,0.7763018340001508,0.7754006909999589],
[96.78093567899987,4.105949430999999,4.102333362000081],
[3.6950249729998177,0.1867992550000963,0.18109103799997683],
[1.0970243589999882,0.1984391500000129,0.18246500600002946],
[3.6618248540000877,0.19925248999993528,0.185544137000079],
[21.191481531999898,0.478393307000033,0.4700544350000655],
[14.240373419999969,3.5203460679997534,3.638875715999802],
[0.8780410869999287,0.547536578999825,0.4830867450000369],
[4.3269140569996125,0.5835747720002473,0.5723880420000569],
[9.054497873999935,0.8204964669998844,0.8048715669997364],
[16.009907588999795,4.723961488999976,7.069065186999978],
[21.76933206700005,2.2042322819997935,2.213854970000284],
[21.591168588000073,2.22991274900005,2.237306400000307],
[1.5004984629999853,0.8202248339998732,0.8199325200002932],
[0.1191018049999002,0.0322099319996596,0.029625235999901633],
[0.06348333900041325,0.016720733999591175,0.01323864699998012],
[0.08746766900003422,0.021063181000045006,0.01836599499984004],
[0.21232219900002747,0.06138483099994119,0.05096832199978962],
[0.06383551399994758,0.015361045000190643,0.009370135000153823],
[0.05804349699974409,0.010943656000108604,0.008688407000136067],
[0.04755750599997555,0.010486199999832024,0.008859258000029513]
]
}

0 comments on commit 71d6cfe

Please sign in to comment.