From b5b34de45f98d216153494eea07b1620b5c343c2 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 15 Nov 2022 10:02:06 +0100
Subject: [PATCH 1/3] Update DuckDB to v0.6.0

---
 duckdb/README.md    |  2 --
 duckdb/benchmark.sh |  2 +-
 duckdb/create.sql   |  3 +--
 duckdb/load.py      | 16 +++++++++++-----
 duckdb/queries.sql  |  4 ++--
 duckdb/query.py     |  7 ++-----
 6 files changed, 17 insertions(+), 17 deletions(-)
 delete mode 100644 duckdb/README.md

diff --git a/duckdb/README.md b/duckdb/README.md
deleted file mode 100644
index d2d7b22c8..000000000
--- a/duckdb/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-DuckDB cannot load parquet file due to OOM.
-The only option is to load a CSV file, but sometimes it also fails with OOM.
diff --git a/duckdb/benchmark.sh b/duckdb/benchmark.sh
index 392f084c5..e0007dd6e 100755
--- a/duckdb/benchmark.sh
+++ b/duckdb/benchmark.sh
@@ -12,7 +12,7 @@ wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
 gzip -d hits.csv.gz
 
 ./load.py
-# 4216.5390389899985 seconds
+# 414 seconds
 
 # Run the queries
 
diff --git a/duckdb/create.sql b/duckdb/create.sql
index 744d595ec..4d23eaac6 100644
--- a/duckdb/create.sql
+++ b/duckdb/create.sql
@@ -104,6 +104,5 @@ CREATE TABLE hits
     HasGCLID SMALLINT NOT NULL,
     RefererHash BIGINT NOT NULL,
     URLHash BIGINT NOT NULL,
-    CLID INTEGER NOT NULL,
-    PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
+    CLID INTEGER NOT NULL
 );
diff --git a/duckdb/load.py b/duckdb/load.py
index d4265d15f..5b581a83f 100755
--- a/duckdb/load.py
+++ b/duckdb/load.py
@@ -5,14 +5,20 @@
 import psutil
 
 con = duckdb.connect(database="my-db.duckdb", read_only=False)
-# See https://github.com/duckdb/duckdb/issues/3969
-con.execute("PRAGMA memory_limit='{}b'".format(psutil.virtual_memory().total / 4))
-con.execute("PRAGMA threads={}".format(psutil.cpu_count(logical=False)))
 
-print("Will load the data")
 
+# enable the progress bar
+con.execute('PRAGMA enable_progress_bar')
+con.execute('PRAGMA enable_print_progress_bar;')
+# enable parallel CSV loading
+con.execute("SET experimental_parallel_csv=true")
+# disable preservation of insertion order
+con.execute("SET preserve_insertion_order=false")
+
+# perform the actual load
+print("Will load the data")
 start = timeit.default_timer()
 con.execute(open("create.sql").read())
-con.execute("INSERT INTO hits SELECT * FROM read_csv_auto('hits.csv')")
+con.execute("COPY hits FROM 'hits.csv'")
 end = timeit.default_timer()
 print(end - start)
diff --git a/duckdb/queries.sql b/duckdb/queries.sql
index 31f65fc89..b4115ee3a 100644
--- a/duckdb/queries.sql
+++ b/duckdb/queries.sql
@@ -25,8 +25,8 @@ SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
 SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
 SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
 SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
-SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
 SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
 SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
 SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
diff --git a/duckdb/query.py b/duckdb/query.py
index 43739be56..85b9eeb15 100755
--- a/duckdb/query.py
+++ b/duckdb/query.py
@@ -9,12 +9,9 @@
 print(query)
 
 con = duckdb.connect(database="my-db.duckdb", read_only=False)
-# See https://github.com/duckdb/duckdb/issues/3969
-con.execute("PRAGMA memory_limit='{}b'".format(psutil.virtual_memory().total / 4))
-con.execute("PRAGMA threads={}".format(psutil.cpu_count(logical=False)))
-
 for try_num in range(3):
     start = timeit.default_timer()
-    con.execute(query)
+    results = con.execute(query).fetchall()
     end = timeit.default_timer()
     print(end - start)
+    del results

From 315e43414ebded693bc22c0d0e520e2915c40280 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 15 Nov 2022 11:05:46 +0100
Subject: [PATCH 2/3] Update DuckDB results to v0.6.0

---
 duckdb/results/c6a.4xlarge.json | 94 ++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 48 deletions(-)

diff --git a/duckdb/results/c6a.4xlarge.json b/duckdb/results/c6a.4xlarge.json
index 1ebe0d129..be8635332 100644
--- a/duckdb/results/c6a.4xlarge.json
+++ b/duckdb/results/c6a.4xlarge.json
@@ -1,58 +1,56 @@
 {
     "system": "DuckDB",
-    "date": "2022-07-01",
+    "date": "2022-11-15",
     "machine": "c6a.4xlarge, 500gb gp2",
     "cluster_size": 1,
-    "comment": "Many queries triggered OOM",
 
     "tags": ["C++", "column-oriented", "embedded"],
 
-    "load_time": 4217,
-    "data_size": 27241492480,
+    "load_time": 416,
+    "data_size": 25024802816,
 
     "result": [
-[0.005694353996659629,0.003944558004150167,0.003837226002360694],
-[0.16991353100456763,0.03919722700084094,0.03835860399703961],
-[0.44898432699847035,0.04947217500011902,0.04852217998995911],
-[0.07586832098604646,0.07051395199960098,0.07007493599667214],
-[9.554053236002801,8.153356187991449,8.73448242500308],
-[7.66042533799191,6.931124911992811,7.103380946995458],
-[0.030703739990713075,0.027668555994750932,0.027583695002249442],
-[0.1778664360026596,0.03942437999648973,0.03882004099432379],
-[8.53439180701389,8.869582625004114,9.020313234999776],
-[10.40215514000738,11.125320470004226,8.941559945000336],
-[1.1747649609897053,1.04221136700653,1.004799570000614],
-[1.2380354650085792,1.1211603130068397,2.4278587239969056],
-[3.1751541379926493,0.9360461989999749,0.8868292279948946],
-[6.855684430003748,7.300301584007684,5.712960822012974],
-[3.70588762400439,1.0249276379909134,0.9473389159975341],
-[2.1037107890006155,1.6215517020027619,1.5671920729946578],
-[null,null,null],
-[null,null,null],
-[null,null,null],
-[0.0002772739971987903,0.00016792300448287278,0.0001574420020915568],
-[null,null,null],
-[null,null,null],
-[null,null,null],
-[null,null,null],
-[2.9310110910009826,0.19020285899750888,0.1736805049877148],
-[2.939304119994631,0.18754731099761557,0.18073286200524308],
-[2.8706370779982535,0.18822155400994234,0.17905898999015335],
-[null,null,null],
-[null,null,null],
-[0.884408778991201,0.714329167996766,0.7135983259940986],
-[5.3762675570033025,0.8803737630078103,0.8728962720051641],
-[7.249190265996731,2.9648747390019707,2.866687831003219],
-[null,null,null],
-[null,null,null],
-[null,null,null],
-[4.515183198003797,4.030519469000865,4.014251719010645],
-[0.11604027298744768,0.040539135996368714,0.04280066800129134],
-[0.0457908230018802,0.021069509006338194,0.019683108999743126],
-[0.0680370800109813,0.011889394998434,0.01056639499438461],
-[0.22029169600864407,0.08547276000899728,0.09095505000732373],
-[0.03759863799496088,0.008373684002435766,0.007633563989656977],
-[0.025631797994719818,0.008081699008471332,0.007858585988287814],
-[0.034359957004198804,0.025543516996549442,0.02533275399764534]
-]
+[0.007988478000015675,0.004515659999924537,0.004503920000161088],
+[0.11100315900011992,0.0329610019998654,0.028879086999950232],
+[0.6478460649998397,0.05915705200004595,0.05621285400002307],
+[1.323183034000067,0.0455285499999718,0.04504826400011552],
+[1.222814291000077,0.7548531599998114,0.7589927649999026],
+[2.8619665539999914,0.7005150949999006,0.6948298479999266],
+[0.11942326999997022,0.05013811599997098,0.04905426100003751],
+[0.14966445099980774,0.034241171000076065,0.03345654100007778],
+[4.244694572000071,0.8486790140000267,0.8334544100000585],
+[3.0409637129998828,1.1786685279998892,1.1486964670000361],
+[1.2471184640000956,0.42324365099989336,0.3113840030000574],
+[4.312250543000118,0.2591428690000157,0.25120333299992126],
+[3.587442906999968,0.5870498879999104,0.5784097270000075],
+[6.9048232530001314,1.0046518700000888,0.987080990999857],
+[4.13467192600001,0.6404815850000887,0.6235957799999596],
+[2.0105650860000424,0.8113902259999577,0.78756950900015],
+[3.248947224999938,2.7703119679999872,1.6008261179999863],
+[3.2351198680000834,1.5603282849999687,1.5319727170001443],
+[10.72432485399986,3.070198415999812,3.065633552999998],
+[2.3459199960000205,0.43844037300004857,0.44263849999993],
+[22.106918537999945,0.8214904700000716,0.815211096999974],
+[22.445440309000105,0.5390162549999786,0.5336183530000653],
+[38.162022707999995,0.7763018340001508,0.7754006909999589],
+[96.78093567899987,4.105949430999999,4.102333362000081],
+[3.6950249729998177,0.1867992550000963,0.18109103799997683],
+[1.0970243589999882,0.1984391500000129,0.18246500600002946],
+[3.6618248540000877,0.19925248999993528,0.185544137000079],
+[21.191481531999898,0.478393307000033,0.4700544350000655],
+[14.240373419999969,3.5203460679997534,3.638875715999802],
+[0.8780410869999287,0.547536578999825,0.4830867450000369],
+[4.3269140569996125,0.5835747720002473,0.5723880420000569],
+[9.054497873999935,0.8204964669998844,0.8048715669997364],
+[16.009907588999795,4.723961488999976,7.069065186999978],
+[21.76933206700005,2.2042322819997935,2.213854970000284],
+[21.591168588000073,2.22991274900005,2.237306400000307],
+[1.5004984629999853,0.8202248339998732,0.8199325200002932],
+[0.1191018049999002,0.0322099319996596,0.029625235999901633],
+[0.06348333900041325,0.016720733999591175,0.01323864699998012],
+[0.08746766900003422,0.021063181000045006,0.01836599499984004],
+[0.21232219900002747,0.06138483099994119,0.05096832199978962],
+[0.06383551399994758,0.015361045000190643,0.009370135000153823],
+[0.05804349699974409,0.010943656000108604,0.008688407000136067],
+[0.04755750599997555,0.010486199999832024,0.008859258000029513],]
 }

From e29c31676faba52b61531074a11cab6d9278fe03 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 15 Nov 2022 11:12:41 +0100
Subject: [PATCH 3/3] Remove trailing comma

---
 duckdb/results/c6a.4xlarge.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/duckdb/results/c6a.4xlarge.json b/duckdb/results/c6a.4xlarge.json
index be8635332..9b977e5c7 100644
--- a/duckdb/results/c6a.4xlarge.json
+++ b/duckdb/results/c6a.4xlarge.json
@@ -52,5 +52,6 @@
 [0.21232219900002747,0.06138483099994119,0.05096832199978962],
 [0.06383551399994758,0.015361045000190643,0.009370135000153823],
 [0.05804349699974409,0.010943656000108604,0.008688407000136067],
-[0.04755750599997555,0.010486199999832024,0.008859258000029513],]
+[0.04755750599997555,0.010486199999832024,0.008859258000029513]
+]
 }