Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8324f13

Browse files
authoredNov 16, 2023
fix: correctly handle null values when initializing fingerprint ordering (#210)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent f957b27 commit 8324f13

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed
 

‎bigframes/session/__init__.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1120,8 +1120,9 @@ def _create_total_ordering(
11201120
ordering_hash_part = guid.generate_guid("bigframes_ordering_")
11211121
ordering_rand_part = guid.generate_guid("bigframes_ordering_")
11221122

1123+
# All inputs into hash must be non-null or resulting hash will be null
11231124
str_values = list(
1124-
map(lambda col: _convert_to_string(table[col]), table.columns)
1125+
map(lambda col: _convert_to_nonnull_string(table[col]), table.columns)
11251126
)
11261127
full_row_str = (
11271128
str_values[0].concat(*str_values[1:])
@@ -1419,7 +1420,7 @@ def _can_cluster_bq(field: bigquery.SchemaField):
14191420
)
14201421

14211422

1422-
def _convert_to_string(column: ibis_types.Column) -> ibis_types.StringColumn:
1423+
def _convert_to_nonnull_string(column: ibis_types.Column) -> ibis_types.StringValue:
14231424
col_type = column.type()
14241425
if (
14251426
col_type.is_numeric()
@@ -1436,4 +1437,6 @@ def _convert_to_string(column: ibis_types.Column) -> ibis_types.StringColumn:
14361437
# TO_JSON_STRING works with all data types, but isn't the most efficient
14371438
# Needed for JSON, STRUCT and ARRAY datatypes
14381439
result = vendored_ibis_ops.ToJsonString(column).to_expr() # type: ignore
1439-
return typing.cast(ibis_types.StringColumn, result)
1440+
# Escape backslashes and use backslash as delineator
1441+
escaped = typing.cast(ibis_types.StringColumn, result.fillna("")).replace("\\", "\\\\") # type: ignore
1442+
return typing.cast(ibis_types.StringColumn, ibis.literal("\\")).concat(escaped)

‎tests/system/small/test_dataframe.py

+8
Original file line numberDiff line numberDiff line change
@@ -2703,6 +2703,14 @@ def test_sample(scalars_dfs, frac, n, random_state):
27032703
assert bf_result.shape[1] == scalars_df.shape[1]
27042704

27052705

2706+
def test_sample_determinism(penguins_df_default_index):
2707+
df = penguins_df_default_index.sample(n=100, random_state=12345).head(15)
2708+
bf_result = df.to_pandas()
2709+
bf_result2 = df.to_pandas()
2710+
2711+
pandas.testing.assert_frame_equal(bf_result, bf_result2)
2712+
2713+
27062714
def test_sample_raises_value_error(scalars_dfs):
27072715
scalars_df, _ = scalars_dfs
27082716
with pytest.raises(

0 commit comments

Comments
 (0)
Failed to load comments.