Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit fe72ada

Browse files
authoredMar 6, 2025
fix: Fix list-like indexers in partial ordering mode (#1456)
1 parent 0241139 commit fe72ada

File tree

5 files changed

+58
-8
lines changed

5 files changed

+58
-8
lines changed
 

‎bigframes/core/blocks.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -2325,13 +2325,15 @@ def _apply_binop(
23252325

23262326
return self.project_exprs(exprs, labels=labels, drop=True)
23272327

2328+
# TODO: Re-implement join in terms of merge (requires also adding remaining merge args)
23282329
def join(
23292330
self,
23302331
other: Block,
23312332
*,
23322333
how="left",
23332334
sort: bool = False,
23342335
block_identity_join: bool = False,
2336+
always_order: bool = False,
23352337
) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
23362338
"""
23372339
Join two blocks objects together, and provide mappings between source columns and output columns.
@@ -2345,6 +2347,8 @@ def join(
23452347
if true will sort result by index
23462348
block_identity_join (bool):
23472349
If true, will not convert join to a projection (implicitly assuming unique indices)
2350+
always_order (bool):
2351+
If true, will always preserve input ordering, even if ordering mode is partial
23482352
23492353
Returns:
23502354
Block, (left_mapping, right_mapping): Result block and mappers from input column ids to result column ids.
@@ -2390,10 +2394,14 @@ def join(
23902394
self._throw_if_null_index("join")
23912395
other._throw_if_null_index("join")
23922396
if self.index.nlevels == other.index.nlevels == 1:
2393-
return join_mono_indexed(self, other, how=how, sort=sort)
2397+
return join_mono_indexed(
2398+
self, other, how=how, sort=sort, propogate_order=always_order
2399+
)
23942400
else: # Handles cases where one or both sides are multi-indexed
23952401
# Always sort mult-index join
2396-
return join_multi_indexed(self, other, how=how, sort=sort)
2402+
return join_multi_indexed(
2403+
self, other, how=how, sort=sort, propogate_order=always_order
2404+
)
23972405

23982406
def is_monotonic_increasing(
23992407
self, column_id: typing.Union[str, Sequence[str]]
@@ -2850,7 +2858,8 @@ def join_mono_indexed(
28502858
right: Block,
28512859
*,
28522860
how="left",
2853-
sort=False,
2861+
sort: bool = False,
2862+
propogate_order: bool = False,
28542863
) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
28552864
left_expr = left.expr
28562865
right_expr = right.expr
@@ -2861,6 +2870,7 @@ def join_mono_indexed(
28612870
conditions=(
28622871
join_defs.JoinCondition(left.index_columns[0], right.index_columns[0]),
28632872
),
2873+
propogate_order=propogate_order,
28642874
)
28652875

28662876
left_index = get_column_left[left.index_columns[0]]
@@ -2895,7 +2905,8 @@ def join_multi_indexed(
28952905
right: Block,
28962906
*,
28972907
how="left",
2898-
sort=False,
2908+
sort: bool = False,
2909+
propogate_order: bool = False,
28992910
) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
29002911
if not (left.index.is_uniquely_named() and right.index.is_uniquely_named()):
29012912
raise ValueError("Joins not supported on indices with non-unique level names")
@@ -2924,6 +2935,7 @@ def join_multi_indexed(
29242935
join_defs.JoinCondition(left, right)
29252936
for left, right in zip(left_join_ids, right_join_ids)
29262937
),
2938+
propogate_order=propogate_order,
29272939
)
29282940

29292941
left_ids_post_join = [get_column_left[id] for id in left_join_ids]

‎bigframes/core/indexers.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -379,12 +379,14 @@ def _perform_loc_list_join(
379379
result = typing.cast(
380380
bigframes.series.Series,
381381
series_or_dataframe.to_frame()._perform_join_by_index(
382-
keys_index, how="right"
382+
keys_index, how="right", always_order=True
383383
)[name],
384384
)
385385
result = result.rename(original_name)
386386
else:
387-
result = series_or_dataframe._perform_join_by_index(keys_index, how="right")
387+
result = series_or_dataframe._perform_join_by_index(
388+
keys_index, how="right", always_order=True
389+
)
388390

389391
if drop_levels and series_or_dataframe.index.nlevels > keys_index.nlevels:
390392
# drop common levels
@@ -492,6 +494,12 @@ def _iloc_getitem_series_or_dataframe(
492494

493495
# set to offset index and use regular loc, then restore index
494496
df = df.reset_index(drop=False)
497+
block = df._block
498+
# explicitly set index to offsets, reset_index may not generate offsets in some modes
499+
block, offsets_id = block.promote_offsets("temp_iloc_offsets_")
500+
block = block.set_index([offsets_id])
501+
df = bigframes.dataframe.DataFrame(block)
502+
495503
result = df.loc[key]
496504
result = result.set_index(temporary_index_names)
497505
result = result.rename_axis(original_index_names)

‎bigframes/dataframe.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -3238,9 +3238,15 @@ def join(
32383238
return left._perform_join_by_index(right, how=how)
32393239

32403240
def _perform_join_by_index(
3241-
self, other: Union[DataFrame, indexes.Index], *, how: str = "left"
3241+
self,
3242+
other: Union[DataFrame, indexes.Index],
3243+
*,
3244+
how: str = "left",
3245+
always_order: bool = False,
32423246
):
3243-
block, _ = self._block.join(other._block, how=how, block_identity_join=True)
3247+
block, _ = self._block.join(
3248+
other._block, how=how, block_identity_join=True, always_order=always_order
3249+
)
32443250
return DataFrame(block)
32453251

32463252
@validations.requires_ordering()

‎tests/system/conftest.py

+10
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,16 @@ def scalars_df_index(
544544
return session.read_gbq(scalars_table_id, index_col="rowindex")
545545

546546

547+
@pytest.fixture(scope="session")
548+
def scalars_df_partial_ordering(
549+
scalars_table_id: str, unordered_session: bigframes.Session
550+
) -> bigframes.dataframe.DataFrame:
551+
"""DataFrame pointing at test data."""
552+
return unordered_session.read_gbq(
553+
scalars_table_id, index_col="rowindex"
554+
).sort_index()
555+
556+
547557
@pytest.fixture(scope="session")
548558
def scalars_df_null_index(
549559
scalars_table_id: str, session: bigframes.Session

‎tests/system/small/test_dataframe.py

+14
Original file line numberDiff line numberDiff line change
@@ -4418,6 +4418,20 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
44184418
)
44194419

44204420

4421+
def test_iloc_list_partial_ordering(
4422+
scalars_df_partial_ordering, scalars_pandas_df_index
4423+
):
4424+
index_list = [0, 0, 0, 5, 4, 7]
4425+
4426+
bf_result = scalars_df_partial_ordering.iloc[index_list]
4427+
pd_result = scalars_pandas_df_index.iloc[index_list]
4428+
4429+
pd.testing.assert_frame_equal(
4430+
bf_result.to_pandas(),
4431+
pd_result,
4432+
)
4433+
4434+
44214435
def test_iloc_list_multiindex(scalars_dfs):
44224436
scalars_df, scalars_pandas_df = scalars_dfs
44234437
scalars_df = scalars_df.copy()

0 commit comments

Comments
 (0)
Failed to load comments.