Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 2dd01c2

Browse files
authoredMar 21, 2024
feat: support Series.dt.floor (#493)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 3e3329a commit 2dd01c2

File tree

5 files changed

+83
-0
lines changed

5 files changed

+83
-0
lines changed
 

‎bigframes/core/compile/scalar_op_compiler.py

+20
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,26 @@ def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp):
622622
)
623623

624624

625+
@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
626+
def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp):
627+
supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"]
628+
pandas_to_ibis_freqs = {"min": "m"}
629+
if op.freq not in supported_freqs:
630+
raise NotImplementedError(
631+
f"Unsupported freq paramater: {op.freq}"
632+
+ " Supported freq parameters are: "
633+
+ ",".join(supported_freqs)
634+
)
635+
if op.freq in pandas_to_ibis_freqs:
636+
ibis_freq = pandas_to_ibis_freqs[op.freq]
637+
else:
638+
ibis_freq = op.freq
639+
result_type = x.type()
640+
result = typing.cast(ibis_types.TimestampValue, x)
641+
result = result.truncate(ibis_freq)
642+
return result.cast(result_type)
643+
644+
625645
@scalar_op_compiler.register_unary_op(ops.time_op)
626646
def time_op_impl(x: ibis_types.Value):
627647
return typing.cast(ibis_types.TimestampValue, x).time()

‎bigframes/operations/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,15 @@ def output_type(self, *input_types):
441441
return dtypes.STRING_DTYPE
442442

443443

444+
@dataclasses.dataclass(frozen=True)
445+
class FloorDtOp(UnaryOp):
446+
name: typing.ClassVar[str] = "floor_dt"
447+
freq: str
448+
449+
def output_type(self, *input_types):
450+
return input_types[0]
451+
452+
444453
# Binary Ops
445454
fillna_op = create_binary_op(name="fillna")
446455
cliplower_op = create_binary_op(name="clip_lower")

‎bigframes/operations/datetimes.py

+3
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,6 @@ def strftime(self, date_format: str) -> series.Series:
9797

9898
def normalize(self) -> series.Series:
9999
return self._apply_unary_op(ops.normalize_op)
100+
101+
def floor(self, freq: str) -> series.Series:
102+
return self._apply_unary_op(ops.FloorDtOp(freq=freq))

‎tests/system/small/operations/test_datetimes.py

+21
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,24 @@ def test_dt_normalize(scalars_dfs, col_name):
282282
pd_result.astype(scalars_df[col_name].dtype), # normalize preserves type
283283
bf_result,
284284
)
285+
286+
287+
@pytest.mark.parametrize(
288+
("col_name", "freq"),
289+
[
290+
("timestamp_col", "D"),
291+
("timestamp_col", "min"),
292+
("datetime_col", "s"),
293+
("datetime_col", "us"),
294+
],
295+
)
296+
@skip_legacy_pandas
297+
def test_dt_floor(scalars_dfs, col_name, freq):
298+
scalars_df, scalars_pandas_df = scalars_dfs
299+
bf_result = scalars_df[col_name].dt.floor(freq).to_pandas()
300+
pd_result = scalars_pandas_df[col_name].dt.floor(freq)
301+
302+
assert_series_equal(
303+
pd_result.astype(scalars_df[col_name].dtype), # floor preserves type
304+
bf_result,
305+
)

‎third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py

+30
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,33 @@ def normalize(self):
6767
bigframes.series.Series of the same dtype as the data.
6868
"""
6969
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
70+
71+
def floor(self, freq: str):
72+
"""
73+
Perform floor operation on the data to the specified freq.
74+
75+
Supported freq arguments are: 'Y' (year), 'Q' (quarter), 'M'
76+
(month), 'W' (week), 'D' (day), 'h' (hour), 'min' (minute), 's'
77+
(second), 'ms' (microsecond), 'us' (nanosecond), 'ns' (nanosecond)
78+
79+
Behavior around clock changes (i.e. daylight savings) is determined
80+
by the SQL engine, so "ambiguous" and "nonexistent" parameters are not
81+
supported. Y, Q, M, and W freqs are not supported by pandas as of
82+
version 2.2, but have been added here due to backend support.
83+
84+
**Examples:**
85+
86+
>>> import pandas as pd
87+
>>> import bigframes.pandas as bpd
88+
>>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
89+
>>> bpd.Series(rng).dt.floor("h")
90+
0 2018-01-01 11:00:00
91+
1 2018-01-01 12:00:00
92+
2 2018-01-01 12:00:00
93+
dtype: timestamp[us][pyarrow]
94+
95+
Args:
96+
freq (str):
97+
Frequency string (e.g. "D", "min", "s").
98+
"""
99+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)
Failed to load comments.