Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: calling to_timdelta() over timedeltas no longer changes their values #1411

Merged
merged 7 commits into from
Feb 20, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
@@ -1186,6 +1186,11 @@ def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp):
).floor()


@scalar_op_compiler.register_unary_op(ops.timedelta_floor_op)
def timedelta_floor_op_impl(x: ibis_types.NumericValue):
return x.floor()


@scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp):
ibis_node = getattr(op.func, "ibis_node", None)
19 changes: 15 additions & 4 deletions bigframes/core/rewrite/timedeltas.py
Original file line number Diff line number Diff line change
@@ -125,6 +125,9 @@ def _rewrite_op_expr(
# but for timedeltas: int(timedelta) // float => int(timedelta)
return _rewrite_floordiv_op(inputs[0], inputs[1])

if isinstance(expr.op, ops.ToTimedeltaOp):
return _rewrite_to_timedelta_op(expr.op, inputs[0])

return _TypedExpr.create_op_expr(expr.op, *inputs)


@@ -154,9 +157,9 @@ def _rewrite_mul_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
result = _TypedExpr.create_op_expr(ops.mul_op, left, right)

if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
if dtypes.is_numeric(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)

return result

@@ -165,7 +168,7 @@ def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
result = _TypedExpr.create_op_expr(ops.div_op, left, right)

if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)

return result

@@ -174,11 +177,19 @@ def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right)

if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)

return result


def _rewrite_to_timedelta_op(op: ops.ToTimedeltaOp, arg: _TypedExpr):
if arg.dtype is dtypes.TIMEDELTA_DTYPE:
# Do nothing for values that are already timedeltas
return arg

return _TypedExpr.create_op_expr(op, arg)


@functools.cache
def _rewrite_aggregation(
aggregation: ex.Aggregation, schema: schema.ArraySchema
2 changes: 2 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
@@ -184,6 +184,7 @@
from bigframes.operations.struct_ops import StructFieldOp, StructOp
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
from bigframes.operations.timedelta_ops import (
timedelta_floor_op,
timestamp_add_op,
timestamp_sub_op,
ToTimedeltaOp,
@@ -259,6 +260,7 @@
"second_op",
"normalize_op",
# Timedelta ops
"timedelta_floor_op",
"timestamp_add_op",
"timestamp_sub_op",
"ToTimedeltaOp",
27 changes: 23 additions & 4 deletions bigframes/operations/timedelta_ops.py
Original file line number Diff line number Diff line change
@@ -36,7 +36,26 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT


@dataclasses.dataclass(frozen=True)
class TimestampAdd(base_ops.BinaryOp):
class TimedeltaFloorOp(base_ops.UnaryOp):
"""Floors the numeric value to the nearest integer and use it to represent a timedelta.

This operator is only meant to be used during expression tree rewrites. Do not use it anywhere else!
"""

name: typing.ClassVar[str] = "timedelta_floor"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
input_type = input_types[0]
if dtypes.is_numeric(input_type) or input_type is dtypes.TIMEDELTA_DTYPE:
return dtypes.TIMEDELTA_DTYPE
raise TypeError(f"unsupported type: {input_type}")


timedelta_floor_op = TimedeltaFloorOp()


@dataclasses.dataclass(frozen=True)
class TimestampAddOp(base_ops.BinaryOp):
name: typing.ClassVar[str] = "timestamp_add"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
@@ -57,10 +76,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
)


timestamp_add_op = TimestampAdd()
timestamp_add_op = TimestampAddOp()


class TimestampSub(base_ops.BinaryOp):
class TimestampSubOp(base_ops.BinaryOp):
name: typing.ClassVar[str] = "timestamp_sub"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
@@ -76,4 +95,4 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
)


timestamp_sub_op = TimestampSub()
timestamp_sub_op = TimestampSubOp()
15 changes: 15 additions & 0 deletions tests/system/small/test_pandas.py
Original file line number Diff line number Diff line change
@@ -829,3 +829,18 @@ def test_to_timedelta_with_bf_series_invalid_unit(session, unit):
@pytest.mark.parametrize("input", [1, 1.2, "1s"])
def test_to_timedelta_non_bf_series(input):
assert bpd.to_timedelta(input) == pd.to_timedelta(input)


def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs):
bf_df, pd_df = scalars_dfs
bf_series = bpd.to_timedelta(bf_df["int64_too"], unit="us")
pd_series = pd.to_timedelta(pd_df["int64_too"], unit="us")

actual_result = (
bpd.to_timedelta(bf_series, unit="s").to_pandas().astype("timedelta64[ns]")
)

expected_result = pd.to_timedelta(pd_series, unit="s")
pd.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)