Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 2715d2b

Browse files
authoredMay 4, 2024
fix: downgrade NoDefaultIndexError to DefaultIndexWarning (#658)
1 parent f561799 commit 2715d2b

File tree

4 files changed

+30
-22
lines changed

4 files changed

+30
-22
lines changed
 

‎bigframes/exceptions.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,5 @@ class CleanupFailedWarning(Warning):
2525
"""Bigframes failed to clean up a table resource."""
2626

2727

28-
class NoDefaultIndexError(ValueError):
29-
"""Unable to create a default index."""
28+
class DefaultIndexWarning(Warning):
29+
"""Default index may cause unexpected costs."""

‎bigframes/session/_io/bigquery/read_gbq_table.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,14 @@ def get_index_cols_and_uniqueness(
277277
# resource utilization because of the default sequential index. See
278278
# internal issue 335727141.
279279
if _is_table_clustered_or_partitioned(table) and not primary_keys:
280-
raise bigframes.exceptions.NoDefaultIndexError(
280+
warnings.warn(
281281
f"Table '{str(table.reference)}' is clustered and/or "
282282
"partitioned, but BigQuery DataFrames was not able to find a "
283-
"suitable index. To avoid this error, set at least one of: "
283+
"suitable index. To avoid this warning, set at least one of: "
284284
# TODO(b/338037499): Allow max_results to override this too,
285285
# once we make it more efficient.
286-
"`index_col` or `filters`."
286+
"`index_col` or `filters`.",
287+
category=bigframes.exceptions.DefaultIndexWarning,
287288
)
288289

289290
# If there are primary keys defined, the query engine assumes these

‎tests/unit/session/test_session.py

+23-16
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import os
1818
import re
1919
from unittest import mock
20+
import warnings
2021

2122
import google.api_core.exceptions
2223
import google.cloud.bigquery
@@ -186,7 +187,7 @@ def get_table_mock(table_ref):
186187

187188

188189
@pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
189-
def test_no_default_index_error_raised_by_read_gbq(table):
190+
def test_default_index_warning_raised_by_read_gbq(table):
190191
"""Because of the windowing operation to create a default index, row
191192
filters can't push down to the clustering column.
192193
@@ -202,12 +203,12 @@ def test_no_default_index_error_raised_by_read_gbq(table):
202203
session = resources.create_bigquery_session(bqclient=bqclient)
203204
table._properties["location"] = session._location
204205

205-
with pytest.raises(bigframes.exceptions.NoDefaultIndexError):
206+
with pytest.warns(bigframes.exceptions.DefaultIndexWarning):
206207
session.read_gbq("my-project.my_dataset.my_table")
207208

208209

209210
@pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
210-
def test_no_default_index_error_not_raised_by_read_gbq_index_col_sequential_int64(
211+
def test_default_index_warning_not_raised_by_read_gbq_index_col_sequential_int64(
211212
table,
212213
):
213214
"""Because of the windowing operation to create a default index, row
@@ -224,11 +225,13 @@ def test_no_default_index_error_not_raised_by_read_gbq_index_col_sequential_int6
224225
session = resources.create_bigquery_session(bqclient=bqclient)
225226
table._properties["location"] = session._location
226227

227-
# No exception raised because we set the option allowing the default indexes.
228-
df = session.read_gbq(
229-
"my-project.my_dataset.my_table",
230-
index_col=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
231-
)
228+
# No warnings raised because we set the option allowing the default indexes.
229+
with warnings.catch_warnings():
230+
warnings.simplefilter("error", bigframes.exceptions.DefaultIndexWarning)
231+
df = session.read_gbq(
232+
"my-project.my_dataset.my_table",
233+
index_col=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
234+
)
232235

233236
# We expect a window operation because we specificaly requested a sequential index.
234237
generated_sql = df.sql.casefold()
@@ -246,7 +249,7 @@ def test_no_default_index_error_not_raised_by_read_gbq_index_col_sequential_int6
246249
),
247250
)
248251
@pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
249-
def test_no_default_index_error_not_raised_by_read_gbq_index_col_columns(
252+
def test_default_index_warning_not_raised_by_read_gbq_index_col_columns(
250253
total_count,
251254
distinct_count,
252255
table,
@@ -270,18 +273,20 @@ def test_no_default_index_error_not_raised_by_read_gbq_index_col_columns(
270273
)
271274
table._properties["location"] = session._location
272275

273-
# No exception raised because there are columns to use as the index.
274-
df = session.read_gbq(
275-
"my-project.my_dataset.my_table", index_col=("idx_1", "idx_2")
276-
)
276+
# No warning raised because there are columns to use as the index.
277+
with warnings.catch_warnings():
278+
warnings.simplefilter("error", bigframes.exceptions.DefaultIndexWarning)
279+
df = session.read_gbq(
280+
"my-project.my_dataset.my_table", index_col=("idx_1", "idx_2")
281+
)
277282

278283
# There should be no analytic operators to prevent row filtering pushdown.
279284
assert "OVER" not in df.sql
280285
assert tuple(df.index.names) == ("idx_1", "idx_2")
281286

282287

283288
@pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
284-
def test_no_default_index_error_not_raised_by_read_gbq_primary_key(table):
289+
def test_default_index_warning_not_raised_by_read_gbq_primary_key(table):
285290
"""If a primary key is set on the table, we use that as the index column
286291
by default, no error should be raised in this case.
287292
@@ -310,8 +315,10 @@ def test_no_default_index_error_not_raised_by_read_gbq_primary_key(table):
310315
)
311316
table._properties["location"] = session._location
312317

313-
# No exception raised because there is a primary key to use as the index.
314-
df = session.read_gbq("my-project.my_dataset.my_table")
318+
# No warning raised because there is a primary key to use as the index.
319+
with warnings.catch_warnings():
320+
warnings.simplefilter("error", bigframes.exceptions.DefaultIndexWarning)
321+
df = session.read_gbq("my-project.my_dataset.my_table")
315322

316323
# There should be no analytic operators to prevent row filtering pushdown.
317324
assert "OVER" not in df.sql

‎third_party/bigframes_vendored/pandas/io/gbq.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def read_gbq(
157157
Alias for columns, retained for backwards compatibility.
158158
159159
Raises:
160-
bigframes.exceptions.NoDefaultIndexError:
160+
bigframes.exceptions.DefaultIndexWarning:
161161
Using the default index is discouraged, such as with clustered
162162
or partitioned tables without primary keys.
163163

0 commit comments

Comments
 (0)
Failed to load comments.