-
Notifications
You must be signed in to change notification settings - Fork 119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add Connection interface #1374
Changes from all commits
ca33f25
517e287
a5b7734
f299562
5ee68b4
ac33915
427e82d
90a238d
b1dd598
12ad71c
7651405
403c6ef
d93d183
2989d32
6ba31b7
e6a480b
a4ec57f
c593545
dede418
6c79796
4156217
07885e6
109d123
1a582f3
b8e39fa
7bd4e7b
6748795
2cb8099
c04e620
74f177d
58ed6d2
5fad3e5
f42e1ac
73e05fd
86bdf13
512ee95
bcf6dfd
1d900b5
40a9226
26d55e3
8c14392
ca9b7d4
8e683e7
0e8b973
fd9122a
af0b8f7
8d65193
e8f1e71
26bc6bb
4aea35a
6816ada
a9c47ef
b738b57
b82d831
2a34bc6
0ca0724
d911ea7
5e5666a
49959a5
66e947e
4daeb76
b32bbdc
171da1f
8971a27
9bc67d4
d70ad74
98c6578
4251f25
cae60ef
497a824
cec20bb
27a68d0
c3c2d14
5bbb066
92e8883
44e871c
b0bf13a
c1743c9
3297e4d
ca244eb
7cded43
33920a7
c25ae5a
f411699
39ed020
99301ef
19318bb
0fb2906
8714d89
d1e3b64
30cbd57
f0dd594
820b454
661dbf5
b4cfb2d
3191005
92a7b59
77db382
1f64c63
d7cbe09
2c01cf6
999df03
b9bdd85
3986358
4248a7f
b339e4a
87e3f9e
b7076e7
45440de
10581dc
d77bb27
a8b63d2
8423576
bd746ee
d7f3de0
9cb0527
ca2e27c
3eb7bc4
03cc9ea
26d880e
bbe20d5
7986f69
e049c38
44b9f32
708fa53
90ce594
44f7dc2
f427725
4c7bfc7
82f6745
78d691c
f24e4c2
69de2e8
4fa2f96
5eaa7b6
a7f20ed
01f6a18
1218d2f
5675160
e15c1cf
f407e1b
52f23e1
f77a791
dece7ac
e2f3fc8
e8b2705
0efcd96
f303962
fb77468
9486217
dcbd33e
5a20d42
384dece
1f22d4f
7f18b6a
78903db
f989b3c
7dfcda1
abe9b7e
00c9e40
917b3ea
d864514
1d262dc
ac03e54
27a0304
e33f705
c9b02ad
68b6acf
e5be724
c6f6883
ffa3be0
bcdaa6d
eb37332
1b03be8
48d99d5
8cbef78
251eace
28d620c
d8af94f
57fd5e4
78259c5
9b8db32
25eb4e5
e7b4cb6
1a7e512
6a1f69d
15b5fae
9191dbb
350767c
b071123
9ebf5e7
6c2a9f9
11c156c
ad75b17
274b4b7
dfa5fd6
69014c4
5d59bb5
90a767a
409deb9
b5a0899
7ff528f
f52620e
d77320d
6a7875e
b174634
849d22b
91af825
5689ec0
0e4122e
6e9083a
17bbe83
e402471
4f2c3d7
4078b9f
684a9da
f4e2208
439e0ae
c3e5b67
eececc3
6c6d173
72d5cbf
74b4c65
a9c5ed1
a76843b
df2208d
e1acb6d
825fc7b
cdcbe50
95b52d7
1f01fd8
fd42822
1185236
35c03ae
6179a18
2608284
e5b26c7
8c9d6e5
58a3272
da4d7f3
8f673db
50cc2a6
71dfce2
650e6a0
f4d2247
6ac85f3
adc0805
ec764e6
be30663
3a5b836
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
/* | ||
* Copyright 2022 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.google.cloud.bigquery; | ||
|
||
import java.io.IOException; | ||
import java.sql.ResultSet; | ||
import java.sql.SQLException; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.logging.Level; | ||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Fork; | ||
import org.openjdk.jmh.annotations.Measurement; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Param; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
import org.openjdk.jmh.annotations.Warmup; | ||
import org.openjdk.jmh.infra.Blackhole; | ||
import org.openjdk.jmh.runner.Runner; | ||
import org.openjdk.jmh.runner.options.Options; | ||
import org.openjdk.jmh.runner.options.OptionsBuilder; | ||
|
||
@Fork(value = 1) | ||
@BenchmarkMode(Mode.AverageTime) | ||
@Warmup(iterations = 1) | ||
@Measurement(iterations = 3) | ||
@State(Scope.Benchmark) | ||
@OutputTimeUnit(TimeUnit.MILLISECONDS) | ||
public class ConnImplBenchmark { | ||
@Param({"500000", "1000000", "10000000", "100000000"}) // 500K, 1M, 10M, and 100M | ||
public int rowLimit; | ||
|
||
private ConnectionSettings connectionSettingsReadAPIEnabled, connectionSettingsReadAPIDisabled; | ||
private long numBuffRows = 100000L; | ||
private final String DATASET = "new_york_taxi_trips"; | ||
private final String QUERY = | ||
"SELECT * FROM bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2017 LIMIT %s"; | ||
public static final long NUM_PAGE_ROW_CNT_RATIO = | ||
10; // ratio of [records in the current page :: total rows] to be met to use read API | ||
public static final long NUM_MIN_RESULT_SIZE = | ||
200000; // min number of records to use to ReadAPI with | ||
|
||
@Setup | ||
public void setUp() throws IOException { | ||
java.util.logging.Logger.getGlobal().setLevel(Level.ALL); | ||
ReadClientConnectionConfiguration clientConnectionConfiguration; | ||
|
||
clientConnectionConfiguration = | ||
ReadClientConnectionConfiguration.newBuilder() | ||
.setTotalToPageRowCountRatio(NUM_PAGE_ROW_CNT_RATIO) | ||
.setMinResultSize(NUM_MIN_RESULT_SIZE) | ||
.setBufferSize(numBuffRows) | ||
.build(); | ||
|
||
connectionSettingsReadAPIEnabled = | ||
ConnectionSettings.newBuilder() | ||
.setDefaultDataset(DatasetId.of(DATASET)) | ||
.setNumBufferedRows(numBuffRows) // page size | ||
.setPriority( | ||
QueryJobConfiguration.Priority | ||
.INTERACTIVE) // DEFAULT VALUE - so that isFastQuerySupported returns false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume this is because Either way, this seems like a fragile way of disabling this feature. Perhaps a more explicit way of turning it on? In Python, I was planning on using the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah this is our way to avoid using |
||
.setReadClientConnectionConfiguration(clientConnectionConfiguration) | ||
.setUseReadAPI(true) // enable read api | ||
.build(); | ||
connectionSettingsReadAPIDisabled = | ||
ConnectionSettings.newBuilder() | ||
.setDefaultDataset(DatasetId.of(DATASET)) | ||
.setNumBufferedRows(numBuffRows) // page size | ||
.setPriority( | ||
QueryJobConfiguration.Priority | ||
.INTERACTIVE) // so that isFastQuerySupported returns false | ||
.setReadClientConnectionConfiguration(clientConnectionConfiguration) | ||
.setUseReadAPI(false) // disable read api | ||
.build(); | ||
} | ||
|
||
@Benchmark | ||
public void iterateRecordsUsingReadAPI(Blackhole blackhole) | ||
throws InterruptedException, BigQuerySQLException { | ||
Connection connectionReadAPIEnabled = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (random train of thought, most likely not actionable, but might be interesting to see if it changes your results any, since creating service instances can get expensive) I had some (never submitted into the codebase, but used before launch) Spanner R2DBC vs Client Library benchmarks where creation of connection factory and client library options/Spanner client was not important, but query performance was, so I used state there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 It's a good suggestion to move connection creation logic out since we care only about query performance here (and hopefully this could even improve the benchmark results). @prash-mi There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can definitely try excluding the connection initialisation times using the technique mentioned, I will update the benchmarks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's do this in a follow-up PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #2015 to track this work. |
||
BigQueryOptions.getDefaultInstance() | ||
.getService() | ||
.createConnection(connectionSettingsReadAPIEnabled); | ||
String selectQuery = String.format(QUERY, rowLimit); | ||
long hash = 0L; | ||
try { | ||
BigQueryResultSet bigQueryResultSet = connectionReadAPIEnabled.executeSelect(selectQuery); | ||
hash = getResultHash(bigQueryResultSet); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} finally { | ||
connectionReadAPIEnabled.close(); // IMP to kill the bg workers | ||
} | ||
blackhole.consume(hash); | ||
} | ||
|
||
@Benchmark | ||
public void iterateRecordsWithoutUsingReadAPI(Blackhole blackhole) | ||
throws InterruptedException, BigQuerySQLException { | ||
Connection connectionReadAPIDisabled = | ||
BigQueryOptions.getDefaultInstance() | ||
.getService() | ||
.createConnection(connectionSettingsReadAPIDisabled); | ||
String selectQuery = String.format(QUERY, rowLimit); | ||
long hash = 0L; | ||
try { | ||
BigQueryResultSet bigQueryResultSet = connectionReadAPIDisabled.executeSelect(selectQuery); | ||
hash = getResultHash(bigQueryResultSet); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} finally { | ||
connectionReadAPIDisabled.close(); // IMP to kill the bg workers | ||
} | ||
blackhole.consume(hash); | ||
} | ||
|
||
// Hashes all the 20 columns of all the rows | ||
stephaniewang526 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
private long getResultHash(BigQueryResultSet bigQueryResultSet) throws SQLException { | ||
ResultSet rs = bigQueryResultSet.getResultSet(); | ||
long hash = 0L; | ||
int cnt = 0; | ||
System.out.print("\n Running"); | ||
while (rs.next()) { | ||
hash += rs.getString("vendor_id") == null ? 0 : rs.getString("vendor_id").hashCode(); | ||
hash += | ||
rs.getString("pickup_datetime") == null ? 0 : rs.getString("pickup_datetime").hashCode(); | ||
hash += | ||
rs.getString("dropoff_datetime") == null | ||
? 0 | ||
: rs.getString("dropoff_datetime").hashCode(); | ||
hash += rs.getLong("passenger_count"); | ||
hash += rs.getDouble("trip_distance"); | ||
hash += rs.getDouble("pickup_longitude"); | ||
hash += rs.getDouble("pickup_latitude"); | ||
hash += rs.getString("rate_code") == null ? 0 : rs.getString("rate_code").hashCode(); | ||
hash += | ||
rs.getString("store_and_fwd_flag") == null | ||
? 0 | ||
: rs.getString("store_and_fwd_flag").hashCode(); | ||
hash += rs.getDouble("dropoff_longitude"); | ||
hash += rs.getDouble("dropoff_latitude"); | ||
hash += rs.getString("payment_type") == null ? 0 : rs.getString("payment_type").hashCode(); | ||
hash += rs.getDouble("fare_amount"); | ||
hash += rs.getDouble("extra"); | ||
hash += rs.getDouble("mta_tax"); | ||
hash += rs.getDouble("tip_amount"); | ||
hash += rs.getDouble("tolls_amount"); | ||
hash += rs.getDouble("imp_surcharge"); | ||
hash += rs.getDouble("total_amount"); | ||
hash += | ||
rs.getString("pickup_location_id") == null | ||
? 0 | ||
: rs.getString("pickup_location_id").hashCode(); | ||
hash += | ||
rs.getString("dropoff_location_id") == null | ||
? 0 | ||
: rs.getString("dropoff_location_id").hashCode(); | ||
if (++cnt % 100000 == 0) { // just to indicate the progress while long running benchmarks | ||
System.out.print("."); | ||
} | ||
} | ||
return hash; | ||
} | ||
|
||
public static void main(String[] args) throws Exception { | ||
Options opt = new OptionsBuilder().include(ConnImplBenchmark.class.getSimpleName()).build(); | ||
new Runner(opt).run(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- see http://www.mojohaus.org/clirr-maven-plugin/examples/ignored-differences.html --> | ||
<differences> | ||
<!-- TODO: REMOVE AFTER RELEASE --> | ||
stephaniewang526 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
<difference> | ||
<differenceType>7012</differenceType> | ||
<className>com/google/cloud/bigquery/BigQuery</className> | ||
<method>com.google.cloud.bigquery.Connection createConnection(com.google.cloud.bigquery.ConnectionSettings)</method> | ||
</difference> | ||
<difference> | ||
<differenceType>7012</differenceType> | ||
<className>com/google/cloud/bigquery/BigQuery</className> | ||
<method>com.google.cloud.bigquery.Connection createConnection()</method> | ||
</difference> | ||
<difference> | ||
<differenceType>7012</differenceType> | ||
<className>com/google/cloud/bigquery/spi/v2/BigQueryRpc</className> | ||
<method>com.google.api.services.bigquery.model.Job createJobForQuery(com.google.api.services.bigquery.model.Job)</method> | ||
</difference> | ||
<difference> | ||
<differenceType>7012</differenceType> | ||
<className>com/google/cloud/bigquery/spi/v2/BigQueryRpc</className> | ||
<method>com.google.api.services.bigquery.model.Job getQueryJob(java.lang.String, java.lang.String, java.lang.String)</method> | ||
</difference> | ||
<difference> | ||
<differenceType>7012</differenceType> | ||
<className>com/google/cloud/bigquery/spi/v2/BigQueryRpc</className> | ||
<method>com.google.api.services.bigquery.model.GetQueryResultsResponse getQueryResultsWithRowLimit(java.lang.String, java.lang.String, java.lang.String, java.lang.Integer)</method> | ||
</difference> | ||
<difference> | ||
<differenceType>7012</differenceType> | ||
<className>com/google/cloud/bigquery/spi/v2/BigQueryRpc</className> | ||
<method>com.google.api.services.bigquery.model.TableDataList listTableDataWithRowLimit(java.lang.String, java.lang.String, java.lang.String, java.lang.Integer, java.lang.String)</method> | ||
</difference> | ||
</differences> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd be curious to see how you plan on this client-side connection settings object interacting with the "session" / "connectionProperties" API (if at all).
In Python, I was hoping to use our "connection" object to include a way of automatically passing the session ID along automatically.
See: https://cloud.google.com/bigquery/docs/sessions-run-queries and googleapis/python-bigquery#971
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps worth filing a similar issue for Java, if populating
connectionProperties
isn't included in this version. I think it'd be the best user experience if a session was automatically created and passed along by the driver.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have both
session
andconnectionProperties
inConnectionSettings
already:connectionProperties:
setCreateSession: