Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4b60049

Browse files
authoredFeb 21, 2025
feat: support routines with ARRAY return type in read_gbq_function (#1412)
* feat: support routines with ARRAY return type in `read_gbq_function` * remove commented out code, fix test to work for all python versions * support array output in binary and nary applications of read_gbq_function
1 parent fa4e3ad commit 4b60049

File tree

11 files changed

+197
-64
lines changed

11 files changed

+197
-64
lines changed
 

‎bigframes/core/compile/ibis_types.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,19 @@ def ibis_array_output_type_from_python_type(t: type) -> ibis_dtypes.DataType:
463463
return python_type_to_ibis_type(t)
464464

465465

466-
def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType:
466+
def ibis_type_from_bigquery_type(
467+
type_: bigquery.StandardSqlDataType,
468+
) -> ibis_dtypes.DataType:
467469
"""Convert bq type to ibis. Only to be used for remote functions, does not handle all types."""
468-
if tk not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS:
470+
if type_.type_kind not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS:
469471
raise UnsupportedTypeError(
470-
tk, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS
472+
type_.type_kind, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS
473+
)
474+
elif type_.type_kind == "ARRAY":
475+
return ibis_dtypes.Array(
476+
value_type=ibis_type_from_bigquery_type(
477+
typing.cast(bigquery.StandardSqlDataType, type_.array_element_type)
478+
)
471479
)
472-
return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
480+
else:
481+
return third_party_ibis_bqtypes.BigQueryType.to_ibis(type_.type_kind)

‎bigframes/dataframe.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -4088,9 +4088,12 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
40884088
)
40894089
result_series.name = None
40904090

4091-
# if the output is an array, reconstruct it from the json serialized
4092-
# string form
4093-
if bigframes.dtypes.is_array_like(func.output_dtype):
4091+
# If the result type is string but the function output is intended
4092+
# to be an array, reconstruct the array from the string assuming it
4093+
# is a json serialized form of the array.
4094+
if bigframes.dtypes.is_string_like(
4095+
result_series.dtype
4096+
) and bigframes.dtypes.is_array_like(func.output_dtype):
40944097
import bigframes.bigquery as bbq
40954098

40964099
result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(

‎bigframes/dtypes.py

+1
Original file line numberDiff line numberDiff line change
@@ -874,4 +874,5 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype:
874874
"INT64",
875875
"INTEGER",
876876
"STRING",
877+
"ARRAY",
877878
}

‎bigframes/functions/_function_session.py

+6
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ def try_delattr(attr):
501501
try_delattr("bigframes_remote_function")
502502
try_delattr("input_dtypes")
503503
try_delattr("output_dtype")
504+
try_delattr("bigframes_bigquery_function_output_dtype")
504505
try_delattr("is_row_processor")
505506
try_delattr("ibis_node")
506507

@@ -589,6 +590,11 @@ def try_delattr(attr):
589590
ibis_signature.output_type
590591
)
591592
)
593+
func.bigframes_bigquery_function_output_dtype = (
594+
bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
595+
ibis_output_type_for_bqrf
596+
)
597+
)
592598
func.is_row_processor = is_row_processor
593599
func.ibis_node = node
594600

‎bigframes/functions/function.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ class ReturnTypeMissingError(ValueError):
5656
# TODO: Move this to compile folder
5757
def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignature:
5858
if routine.return_type:
59-
ibis_output_type = bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
60-
routine.return_type.type_kind
59+
ibis_output_type = (
60+
bigframes.core.compile.ibis_types.ibis_type_from_bigquery_type(
61+
routine.return_type
62+
)
6163
)
6264
else:
6365
raise ReturnTypeMissingError
@@ -82,8 +84,8 @@ def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignatu
8284
return _utils.IbisSignature(
8385
parameter_names=[arg.name for arg in routine.arguments],
8486
input_types=[
85-
bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
86-
arg.data_type.type_kind
87+
bigframes.core.compile.ibis_types.ibis_type_from_bigquery_type(
88+
arg.data_type
8789
)
8890
if arg.data_type
8991
else None
@@ -233,6 +235,8 @@ def func(*bigframes_args, **bigframes_kwargs):
233235
else ibis_signature.output_type
234236
)
235237

238+
func.bigframes_bigquery_function_output_dtype = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(ibis_signature.output_type) # type: ignore
239+
236240
func.is_row_processor = is_row_processor # type: ignore
237241
func.ibis_node = node # type: ignore
238242
return func

‎bigframes/operations/remote_function_ops.py

+9-31
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import dataclasses
1616
import typing
1717

18-
from bigframes import dtypes
1918
from bigframes.operations import base_ops
2019

2120

@@ -31,17 +30,10 @@ def expensive(self) -> bool:
3130

3231
def output_type(self, *input_types):
3332
# This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
34-
if hasattr(self.func, "output_dtype"):
35-
if dtypes.is_array_like(self.func.output_dtype):
36-
# TODO(b/284515241): remove this special handling to support
37-
# array output types once BQ remote functions support ARRAY.
38-
# Until then, use json serialized strings at the remote function
39-
# level, and parse that to the intended output type at the
40-
# bigframes level.
41-
return dtypes.STRING_DTYPE
42-
return self.func.output_dtype
33+
if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
34+
return self.func.bigframes_bigquery_function_output_dtype
4335
else:
44-
raise AttributeError("output_dtype not defined")
36+
raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
4537

4638

4739
@dataclasses.dataclass(frozen=True)
@@ -55,17 +47,10 @@ def expensive(self) -> bool:
5547

5648
def output_type(self, *input_types):
5749
# This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
58-
if hasattr(self.func, "output_dtype"):
59-
if dtypes.is_array_like(self.func.output_dtype):
60-
# TODO(b/284515241): remove this special handling to support
61-
# array output types once BQ remote functions support ARRAY.
62-
# Until then, use json serialized strings at the remote function
63-
# level, and parse that to the intended output type at the
64-
# bigframes level.
65-
return dtypes.STRING_DTYPE
66-
return self.func.output_dtype
50+
if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
51+
return self.func.bigframes_bigquery_function_output_dtype
6752
else:
68-
raise AttributeError("output_dtype not defined")
53+
raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
6954

7055

7156
@dataclasses.dataclass(frozen=True)
@@ -79,14 +64,7 @@ def expensive(self) -> bool:
7964

8065
def output_type(self, *input_types):
8166
# This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
82-
if hasattr(self.func, "output_dtype"):
83-
if dtypes.is_array_like(self.func.output_dtype):
84-
# TODO(b/284515241): remove this special handling to support
85-
# array output types once BQ remote functions support ARRAY.
86-
# Until then, use json serialized strings at the remote function
87-
# level, and parse that to the intended output type at the
88-
# bigframes level.
89-
return dtypes.STRING_DTYPE
90-
return self.func.output_dtype
67+
if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
68+
return self.func.bigframes_bigquery_function_output_dtype
9169
else:
92-
raise AttributeError("output_dtype not defined")
70+
raise AttributeError("bigframes_bigquery_function_output_dtype not defined")

‎bigframes/series.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1545,9 +1545,12 @@ def apply(
15451545
ops.RemoteFunctionOp(func=func, apply_on_null=True)
15461546
)
15471547

1548-
# if the output is an array, reconstruct it from the json serialized
1549-
# string form
1550-
if bigframes.dtypes.is_array_like(func.output_dtype):
1548+
# If the result type is string but the function output is intended to
1549+
# be an array, reconstruct the array from the string assuming it is a
1550+
# json serialized form of the array.
1551+
if bigframes.dtypes.is_string_like(
1552+
result_series.dtype
1553+
) and bigframes.dtypes.is_array_like(func.output_dtype):
15511554
import bigframes.bigquery as bbq
15521555

15531556
result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
@@ -1585,9 +1588,12 @@ def combine(
15851588
other, ops.BinaryRemoteFunctionOp(func=func)
15861589
)
15871590

1588-
# if the output is an array, reconstruct it from the json serialized
1589-
# string form
1590-
if bigframes.dtypes.is_array_like(func.output_dtype):
1591+
# If the result type is string but the function output is intended to
1592+
# be an array, reconstruct the array from the string assuming it is a
1593+
# json serialized form of the array.
1594+
if bigframes.dtypes.is_string_like(
1595+
result_series.dtype
1596+
) and bigframes.dtypes.is_array_like(func.output_dtype):
15911597
import bigframes.bigquery as bbq
15921598

15931599
result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(

‎tests/system/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,11 @@ def table_id_unique(dataset_id: str):
251251
return f"{dataset_id}.{prefixer.create_prefix()}"
252252

253253

254+
@pytest.fixture(scope="function")
255+
def routine_id_unique(dataset_id: str):
256+
return f"{dataset_id}.{prefixer.create_prefix()}"
257+
258+
254259
@pytest.fixture(scope="session")
255260
def scalars_schema(bigquery_client: bigquery.Client):
256261
# TODO(swast): Add missing scalar data types such as BIGNUMERIC.

‎tests/system/large/functions/test_remote_function.py

+4
Original file line numberDiff line numberDiff line change
@@ -2193,6 +2193,10 @@ def foo(x, y, z):
21932193
)
21942194
)
21952195
)
2196+
assert (
2197+
getattr(foo, "bigframes_bigquery_function_output_dtype")
2198+
== bigframes.dtypes.STRING_DTYPE
2199+
)
21962200

21972201
# Fails to apply on dataframe with incompatible number of columns
21982202
with pytest.raises(
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Failed to load comments.