Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 7ae565d

Browse files
authoredFeb 5, 2025
perf: Simplify merge join key coalescing (#1361)
1 parent e7493c8 commit 7ae565d

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed
 

‎bigframes/core/blocks.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -2077,14 +2077,12 @@ def merge(
20772077
result_columns = []
20782078
matching_join_labels = []
20792079

2080-
coalesced_ids = []
2081-
for left_id, right_id in zip(left_join_ids, right_join_ids):
2082-
joined_expr, coalesced_id = joined_expr.project_to_id(
2083-
ops.coalesce_op.as_expr(
2084-
get_column_left[left_id], get_column_right[right_id]
2085-
),
2086-
)
2087-
coalesced_ids.append(coalesced_id)
2080+
left_post_join_ids = tuple(get_column_left[id] for id in left_join_ids)
2081+
right_post_join_ids = tuple(get_column_right[id] for id in right_join_ids)
2082+
2083+
joined_expr, coalesced_ids = coalesce_columns(
2084+
joined_expr, left_post_join_ids, right_post_join_ids, how=how, drop=False
2085+
)
20882086

20892087
for col_id in self.value_columns:
20902088
if col_id in left_join_ids:
@@ -2102,7 +2100,6 @@ def merge(
21022100
result_columns.append(get_column_left[col_id])
21032101
for col_id in other.value_columns:
21042102
if col_id in right_join_ids:
2105-
key_part = right_join_ids.index(col_id)
21062103
if other.col_id_to_label[matching_right_id] in matching_join_labels:
21072104
pass
21082105
else:
@@ -2928,26 +2925,31 @@ def resolve_label_id(label: Label) -> str:
29282925
)
29292926

29302927

2928+
# TODO: Rewrite just to return expressions
29312929
def coalesce_columns(
29322930
expr: core.ArrayValue,
29332931
left_ids: typing.Sequence[str],
29342932
right_ids: typing.Sequence[str],
29352933
how: str,
2934+
drop: bool = True,
29362935
) -> Tuple[core.ArrayValue, Sequence[str]]:
29372936
result_ids = []
29382937
for left_id, right_id in zip(left_ids, right_ids):
29392938
if how == "left" or how == "inner" or how == "cross":
29402939
result_ids.append(left_id)
2941-
expr = expr.drop_columns([right_id])
2940+
if drop:
2941+
expr = expr.drop_columns([right_id])
29422942
elif how == "right":
29432943
result_ids.append(right_id)
2944-
expr = expr.drop_columns([left_id])
2944+
if drop:
2945+
expr = expr.drop_columns([left_id])
29452946
elif how == "outer":
29462947
coalesced_id = guid.generate_guid()
29472948
expr, coalesced_id = expr.project_to_id(
29482949
ops.coalesce_op.as_expr(left_id, right_id)
29492950
)
2950-
expr = expr.drop_columns([left_id, right_id])
2951+
if drop:
2952+
expr = expr.drop_columns([left_id, right_id])
29512953
result_ids.append(coalesced_id)
29522954
else:
29532955
raise ValueError(f"Unexpected join type: {how}. {constants.FEEDBACK_LINK}")

0 commit comments

Comments
 (0)
Failed to load comments.