fix: downgrade NoDefaultIndexError to DefaultIndexWarning (#658)

tswast · web-flow · commit 2715d2b4a353 · 2024-05-03T17:54:33.000-07:00
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
@@ -25,5 +25,5 @@ class CleanupFailedWarning(Warning):
     """Bigframes failed to clean up a table resource."""
 
 
-class NoDefaultIndexError(ValueError):
-    """Unable to create a default index."""
+class DefaultIndexWarning(Warning):
+    """Default index may cause unexpected costs."""
diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py
@@ -277,13 +277,14 @@ def get_index_cols_and_uniqueness(
         # resource utilization because of the default sequential index. See
         # internal issue 335727141.
         if _is_table_clustered_or_partitioned(table) and not primary_keys:
-            raise bigframes.exceptions.NoDefaultIndexError(
+            warnings.warn(
                 f"Table '{str(table.reference)}' is clustered and/or "
                 "partitioned, but BigQuery DataFrames was not able to find a "
-                "suitable index. To avoid this error, set at least one of: "
+                "suitable index. To avoid this warning, set at least one of: "
                 # TODO(b/338037499): Allow max_results to override this too,
                 # once we make it more efficient.
-                "`index_col` or `filters`."
+                "`index_col` or `filters`.",
+                category=bigframes.exceptions.DefaultIndexWarning,
             )
 
         # If there are primary keys defined, the query engine assumes these
diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py
@@ -17,6 +17,7 @@
 import os
 import re
 from unittest import mock
+import warnings
 
 import google.api_core.exceptions
 import google.cloud.bigquery
@@ -186,7 +187,7 @@ def get_table_mock(table_ref):
 
 
 @pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
-def test_no_default_index_error_raised_by_read_gbq(table):
+def test_default_index_warning_raised_by_read_gbq(table):
     """Because of the windowing operation to create a default index, row
     filters can't push down to the clustering column.
 
@@ -202,12 +203,12 @@ def test_no_default_index_error_raised_by_read_gbq(table):
     session = resources.create_bigquery_session(bqclient=bqclient)
     table._properties["location"] = session._location
 
-    with pytest.raises(bigframes.exceptions.NoDefaultIndexError):
+    with pytest.warns(bigframes.exceptions.DefaultIndexWarning):
         session.read_gbq("my-project.my_dataset.my_table")
 
 
 @pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
-def test_no_default_index_error_not_raised_by_read_gbq_index_col_sequential_int64(
+def test_default_index_warning_not_raised_by_read_gbq_index_col_sequential_int64(
     table,
 ):
     """Because of the windowing operation to create a default index, row
@@ -224,11 +225,13 @@ def test_no_default_index_error_not_raised_by_read_gbq_index_col_sequential_int6
     session = resources.create_bigquery_session(bqclient=bqclient)
     table._properties["location"] = session._location
 
-    # No exception raised because we set the option allowing the default indexes.
-    df = session.read_gbq(
-        "my-project.my_dataset.my_table",
-        index_col=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
-    )
+    # No warnings raised because we set the option allowing the default indexes.
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", bigframes.exceptions.DefaultIndexWarning)
+        df = session.read_gbq(
+            "my-project.my_dataset.my_table",
+            index_col=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
+        )
 
     # We expect a window operation because we specificaly requested a sequential index.
     generated_sql = df.sql.casefold()
@@ -246,7 +249,7 @@ def test_no_default_index_error_not_raised_by_read_gbq_index_col_sequential_int6
     ),
 )
 @pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
-def test_no_default_index_error_not_raised_by_read_gbq_index_col_columns(
+def test_default_index_warning_not_raised_by_read_gbq_index_col_columns(
     total_count,
     distinct_count,
     table,
@@ -270,18 +273,20 @@ def test_no_default_index_error_not_raised_by_read_gbq_index_col_columns(
     )
     table._properties["location"] = session._location
 
-    # No exception raised because there are columns to use as the index.
-    df = session.read_gbq(
-        "my-project.my_dataset.my_table", index_col=("idx_1", "idx_2")
-    )
+    # No warning raised because there are columns to use as the index.
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", bigframes.exceptions.DefaultIndexWarning)
+        df = session.read_gbq(
+            "my-project.my_dataset.my_table", index_col=("idx_1", "idx_2")
+        )
 
     # There should be no analytic operators to prevent row filtering pushdown.
     assert "OVER" not in df.sql
     assert tuple(df.index.names) == ("idx_1", "idx_2")
 
 
 @pytest.mark.parametrize("table", CLUSTERED_OR_PARTITIONED_TABLES)
-def test_no_default_index_error_not_raised_by_read_gbq_primary_key(table):
+def test_default_index_warning_not_raised_by_read_gbq_primary_key(table):
     """If a primary key is set on the table, we use that as the index column
     by default, no error should be raised in this case.
 
@@ -310,8 +315,10 @@ def test_no_default_index_error_not_raised_by_read_gbq_primary_key(table):
     )
     table._properties["location"] = session._location
 
-    # No exception raised because there is a primary key to use as the index.
-    df = session.read_gbq("my-project.my_dataset.my_table")
+    # No warning raised because there is a primary key to use as the index.
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", bigframes.exceptions.DefaultIndexWarning)
+        df = session.read_gbq("my-project.my_dataset.my_table")
 
     # There should be no analytic operators to prevent row filtering pushdown.
     assert "OVER" not in df.sql
diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py
@@ -157,7 +157,7 @@ def read_gbq(
                 Alias for columns, retained for backwards compatibility.
 
         Raises:
-            bigframes.exceptions.NoDefaultIndexError:
+            bigframes.exceptions.DefaultIndexWarning:
                 Using the default index is discouraged, such as with clustered
                 or partitioned tables without primary keys.