Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6ee48d5

Browse files
authoredFeb 19, 2025
perf: Avoid redundant SQL casts (#1399)
1 parent 0d6ae87 commit 6ee48d5

File tree

3 files changed

+21
-44
lines changed

3 files changed

+21
-44
lines changed
 

‎bigframes/core/compile/compiler.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,7 @@ def compile_read_table_unordered(
240240
return compiled.UnorderedIR(
241241
ibis_table,
242242
tuple(
243-
bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
244-
ibis_table[scan_item.source_id].name(scan_item.id.sql)
245-
)
243+
ibis_table[scan_item.source_id].name(scan_item.id.sql)
246244
for scan_item in scan.items
247245
),
248246
)

‎bigframes/core/compile/ibis_types.py

+10-21
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,9 @@ def cast_ibis_value(
113113
114114
Raises:
115115
TypeError: if the type cast cannot be executed"""
116-
if value.type() == to_type:
116+
# normalize to nullable, which doesn't impact compatibility
117+
value_type = value.type().copy(nullable=True)
118+
if value_type == to_type:
117119
return value
118120
# casts that just work
119121
# TODO(bmil): add to this as more casts are verified
@@ -189,52 +191,39 @@ def cast_ibis_value(
189191
ibis_dtypes.multipolygon: (IBIS_GEO_TYPE,),
190192
}
191193

192-
value = ibis_value_to_canonical_type(value)
193-
if value.type() in good_casts:
194-
if to_type in good_casts[value.type()]:
194+
if value_type in good_casts:
195+
if to_type in good_casts[value_type]:
195196
return value.try_cast(to_type) if safe else value.cast(to_type)
196197
else:
197198
# this should never happen
198199
raise TypeError(
199-
f"Unexpected value type {value.type()}. {constants.FEEDBACK_LINK}"
200+
f"Unexpected value type {value_type}. {constants.FEEDBACK_LINK}"
200201
)
201202

202203
# casts that need some encouragement
203204

204205
# BigQuery casts bools to lower case strings. Capitalize the result to match Pandas
205206
# TODO(bmil): remove this workaround after fixing Ibis
206-
if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.string:
207+
if value_type == ibis_dtypes.bool and to_type == ibis_dtypes.string:
207208
if safe:
208209
return cast(ibis_types.StringValue, value.try_cast(to_type)).capitalize()
209210
else:
210211
return cast(ibis_types.StringValue, value.cast(to_type)).capitalize()
211212

212-
if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.float64:
213+
if value_type == ibis_dtypes.bool and to_type == ibis_dtypes.float64:
213214
if safe:
214215
return value.try_cast(ibis_dtypes.int64).try_cast(ibis_dtypes.float64)
215216
else:
216217
return value.cast(ibis_dtypes.int64).cast(ibis_dtypes.float64)
217218

218-
if value.type() == ibis_dtypes.float64 and to_type == ibis_dtypes.bool:
219+
if value_type == ibis_dtypes.float64 and to_type == ibis_dtypes.bool:
219220
return value != ibis_types.literal(0)
220221

221222
raise TypeError(
222-
f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}"
223+
f"Unsupported cast {value_type} to {to_type}. {constants.FEEDBACK_LINK}"
223224
)
224225

225226

226-
def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
227-
"""Converts an Ibis expression to canonical type.
228-
229-
This is useful in cases where multiple types correspond to the same BigFrames dtype.
230-
"""
231-
ibis_type = value.type()
232-
name = value.get_name()
233-
# Allow REQUIRED fields to be joined with NULLABLE fields.
234-
nullable_type = ibis_type.copy(nullable=True)
235-
return value.cast(nullable_type).name(name)
236-
237-
238227
def bigframes_dtype_to_ibis_dtype(
239228
bigframes_dtype: bigframes.dtypes.Dtype,
240229
) -> ibis_dtypes.DataType:

‎third_party/bigframes_vendored/ibis/expr/operations/relations.py

+10-20
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,7 @@
1010
from typing import Annotated, Any, Literal, Optional, TypeVar
1111

1212
from bigframes_vendored.ibis.common.annotations import attribute
13-
from bigframes_vendored.ibis.common.collections import (
14-
ConflictingValuesError,
15-
FrozenDict,
16-
FrozenOrderedDict,
17-
)
13+
from bigframes_vendored.ibis.common.collections import FrozenDict, FrozenOrderedDict
1814
from bigframes_vendored.ibis.common.exceptions import (
1915
IbisTypeError,
2016
IntegrityError,
@@ -342,20 +338,6 @@ class Set(Relation):
342338
values = FrozenOrderedDict()
343339

344340
def __init__(self, left, right, **kwargs):
345-
err_msg = "Table schemas must be equal for set operations."
346-
try:
347-
missing_from_left = right.schema - left.schema
348-
missing_from_right = left.schema - right.schema
349-
except ConflictingValuesError as e:
350-
raise RelationError(err_msg + "\n" + str(e)) from e
351-
if missing_from_left or missing_from_right:
352-
msgs = [err_msg]
353-
if missing_from_left:
354-
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
355-
if missing_from_right:
356-
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")
357-
raise RelationError("\n".join(msgs))
358-
359341
if left.schema.names != right.schema.names:
360342
# rewrite so that both sides have the columns in the same order making it
361343
# easier for the backends to implement set operations
@@ -365,7 +347,15 @@ def __init__(self, left, right, **kwargs):
365347

366348
@attribute
367349
def schema(self):
368-
return self.left.schema
350+
dtypes = (
351+
dt.higher_precedence(ltype, rtype)
352+
for ltype, rtype in zip(
353+
self.left.schema.values(), self.right.schema.values()
354+
)
355+
)
356+
return Schema.from_tuples(
357+
(name, coltype) for name, coltype in zip(self.left.schema.names, dtypes)
358+
)
369359

370360

371361
@public

0 commit comments

Comments
 (0)
Failed to load comments.