Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 86e0f38

Browse files
authoredApr 10, 2024
feat: Add hasnans, combine_first, update to Series (#600)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 5d0f149 commit 86e0f38

File tree

4 files changed

+236
-2
lines changed

4 files changed

+236
-2
lines changed
 

‎bigframes/core/convert.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,27 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16+
from typing import Optional
17+
1618
import pandas as pd
1719

1820
import bigframes.core.indexes as index
1921
import bigframes.series as series
2022

2123

22-
def to_bf_series(obj, default_index: index.Index) -> series.Series:
24+
def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series:
25+
"""
26+
Convert a an object to a bigframes series
27+
28+
Args:
29+
obj (list-like or Series):
30+
Object to convert to bigframes Series
31+
default_index (list-like or Index or None):
32+
Index to use if obj has no index
33+
34+
Returns
35+
bigframes.pandas.Series
36+
"""
2337
if isinstance(obj, series.Series):
2438
return obj
2539
if isinstance(obj, pd.Series):
@@ -35,6 +49,18 @@ def to_bf_series(obj, default_index: index.Index) -> series.Series:
3549

3650

3751
def to_pd_series(obj, default_index: pd.Index) -> pd.Series:
52+
"""
53+
Convert a an object to a pandas series
54+
55+
Args:
56+
obj (list-like or Series):
57+
Object to convert to pandas Series
58+
default_index (list-like or Index or None):
59+
Index to use if obj has no index
60+
61+
Returns
62+
pandas.Series
63+
"""
3864
if isinstance(obj, series.Series):
3965
return obj.to_pandas()
4066
if isinstance(obj, pd.Series):

‎bigframes/series.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import os
2323
import textwrap
2424
import typing
25-
from typing import Any, Literal, Mapping, Optional, Tuple, Union
25+
from typing import Any, Literal, Mapping, Optional, Sequence, Tuple, Union
2626

2727
import bigframes_vendored.pandas.core.series as vendored_pandas_series
2828
import google.cloud.bigquery as bigquery
@@ -130,6 +130,11 @@ def ndim(self) -> int:
130130
def empty(self) -> bool:
131131
return self.shape[0] == 0
132132

133+
@property
134+
def hasnans(self) -> bool:
135+
# Note, hasnans is actually a null check, and NaNs don't count for nullable float
136+
return self.isnull().any()
137+
133138
@property
134139
def values(self) -> numpy.ndarray:
135140
return self.to_numpy()
@@ -753,6 +758,20 @@ def __matmul__(self, other):
753758

754759
dot = __matmul__
755760

761+
def combine_first(self, other: Series) -> Series:
762+
result = self._apply_binary_op(other, ops.coalesce_op)
763+
result.name = self.name
764+
return result
765+
766+
def update(self, other: Union[Series, Sequence, Mapping]) -> None:
767+
import bigframes.core.convert
768+
769+
other = bigframes.core.convert.to_bf_series(other, default_index=None)
770+
result = self._apply_binary_op(
771+
other, ops.coalesce_op, reverse=True, alignment="left"
772+
)
773+
self._set_block(result._get_block())
774+
756775
def abs(self) -> Series:
757776
return self._apply_unary_op(ops.abs_op)
758777

‎tests/system/small/test_series.py

+51
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,39 @@ def test_binop_right_filtered(scalars_dfs):
12611261
)
12621262

12631263

1264+
@skip_legacy_pandas
1265+
def test_series_combine_first(scalars_dfs):
1266+
scalars_df, scalars_pandas_df = scalars_dfs
1267+
int64_col = scalars_df["int64_col"].head(7)
1268+
float64_col = scalars_df["float64_col"].tail(7)
1269+
bf_result = int64_col.combine_first(float64_col).to_pandas()
1270+
1271+
pd_int64_col = scalars_pandas_df["int64_col"].head(7)
1272+
pd_float64_col = scalars_pandas_df["float64_col"].tail(7)
1273+
pd_result = pd_int64_col.combine_first(pd_float64_col)
1274+
1275+
assert_series_equal(
1276+
bf_result,
1277+
pd_result,
1278+
)
1279+
1280+
1281+
def test_series_update(scalars_dfs):
1282+
scalars_df, scalars_pandas_df = scalars_dfs
1283+
int64_col = scalars_df["int64_col"].head(7)
1284+
float64_col = scalars_df["float64_col"].tail(7).copy()
1285+
float64_col.update(int64_col)
1286+
1287+
pd_int64_col = scalars_pandas_df["int64_col"].head(7)
1288+
pd_float64_col = scalars_pandas_df["float64_col"].tail(7).copy()
1289+
pd_float64_col.update(pd_int64_col)
1290+
1291+
assert_series_equal(
1292+
float64_col.to_pandas(),
1293+
pd_float64_col,
1294+
)
1295+
1296+
12641297
def test_mean(scalars_dfs):
12651298
scalars_df, scalars_pandas_df = scalars_dfs
12661299
col_name = "int64_col"
@@ -1649,6 +1682,24 @@ def test_size(scalars_dfs):
16491682
assert pd_result == bf_result
16501683

16511684

1685+
def test_series_hasnans_true(scalars_dfs):
1686+
scalars_df, scalars_pandas_df = scalars_dfs
1687+
1688+
bf_result = scalars_df["string_col"].hasnans
1689+
pd_result = scalars_pandas_df["string_col"].hasnans
1690+
1691+
assert pd_result == bf_result
1692+
1693+
1694+
def test_series_hasnans_false(scalars_dfs):
1695+
scalars_df, scalars_pandas_df = scalars_dfs
1696+
1697+
bf_result = scalars_df["string_col"].dropna().hasnans
1698+
pd_result = scalars_pandas_df["string_col"].dropna().hasnans
1699+
1700+
assert pd_result == bf_result
1701+
1702+
16521703
def test_empty_false(scalars_dfs):
16531704
scalars_df, scalars_pandas_df = scalars_dfs
16541705

‎third_party/bigframes_vendored/pandas/core/series.py

+138
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,31 @@ def name(self) -> Hashable:
175175
"""
176176
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
177177

178+
@property
179+
def hasnans(self) -> bool:
180+
"""
181+
Return True if there are any NaNs.
182+
183+
**Examples:**
184+
185+
>>> import bigframes.pandas as bpd
186+
>>> bpd.options.display.progress_bar = None
187+
188+
>>> s = bpd.Series([1, 2, 3, None])
189+
>>> s
190+
0 1.0
191+
1 2.0
192+
2 3.0
193+
3 <NA>
194+
dtype: Float64
195+
>>> s.hasnans
196+
True
197+
198+
Returns:
199+
bool
200+
"""
201+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
202+
178203
@property
179204
def T(self) -> Series:
180205
"""Return the transpose, which is by definition self.
@@ -2343,6 +2368,119 @@ def rdivmod(self, other) -> Series:
23432368
"""
23442369
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
23452370

2371+
def combine_first(self, other) -> Series:
2372+
"""
2373+
Update null elements with value in the same location in 'other'.
2374+
2375+
Combine two Series objects by filling null values in one Series with
2376+
non-null values from the other Series. Result index will be the union
2377+
of the two indexes.
2378+
2379+
**Examples:**
2380+
>>> import bigframes.pandas as bpd
2381+
>>> import numpy as np
2382+
>>> bpd.options.display.progress_bar = None
2383+
2384+
>>> s1 = bpd.Series([1, np.nan])
2385+
>>> s2 = bpd.Series([3, 4, 5])
2386+
>>> s1.combine_first(s2)
2387+
0 1.0
2388+
1 4.0
2389+
2 5.0
2390+
dtype: Float64
2391+
2392+
Null values still persist if the location of that null value
2393+
does not exist in `other`
2394+
2395+
>>> s1 = bpd.Series({'falcon': np.nan, 'eagle': 160.0})
2396+
>>> s2 = bpd.Series({'eagle': 200.0, 'duck': 30.0})
2397+
>>> s1.combine_first(s2)
2398+
falcon <NA>
2399+
eagle 160.0
2400+
duck 30.0
2401+
dtype: Float64
2402+
2403+
Args:
2404+
other (Series):
2405+
The value(s) to be used for filling null values.
2406+
2407+
Returns:
2408+
Series: The result of combining the provided Series with the other object.
2409+
"""
2410+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
2411+
2412+
def update(self, other) -> None:
2413+
"""
2414+
Modify Series in place using values from passed Series.
2415+
2416+
Uses non-NA values from passed Series to make updates. Aligns
2417+
on index.
2418+
2419+
**Examples:**
2420+
>>> import bigframes.pandas as bpd
2421+
>>> import pandas as pd
2422+
>>> import numpy as np
2423+
>>> bpd.options.display.progress_bar = None
2424+
2425+
>>> s = bpd.Series([1, 2, 3])
2426+
>>> s.update(bpd.Series([4, 5, 6]))
2427+
>>> s
2428+
0 4
2429+
1 5
2430+
2 6
2431+
dtype: Int64
2432+
2433+
>>> s = bpd.Series(['a', 'b', 'c'])
2434+
>>> s.update(bpd.Series(['d', 'e'], index=[0, 2]))
2435+
>>> s
2436+
0 d
2437+
1 b
2438+
2 e
2439+
dtype: string
2440+
2441+
>>> s = bpd.Series([1, 2, 3])
2442+
>>> s.update(bpd.Series([4, 5, 6, 7, 8]))
2443+
>>> s
2444+
0 4
2445+
1 5
2446+
2 6
2447+
dtype: Int64
2448+
2449+
If ``other`` contains NaNs the corresponding values are not updated
2450+
in the original Series.
2451+
2452+
>>> s = bpd.Series([1, 2, 3])
2453+
>>> s.update(bpd.Series([4, np.nan, 6], dtype=pd.Int64Dtype()))
2454+
>>> s
2455+
0 4
2456+
1 2
2457+
2 6
2458+
dtype: Int64
2459+
2460+
``other`` can also be a non-Series object type
2461+
that is coercible into a Series
2462+
2463+
>>> s = bpd.Series([1, 2, 3])
2464+
>>> s.update([4, np.nan, 6])
2465+
>>> s
2466+
0 4.0
2467+
1 2.0
2468+
2 6.0
2469+
dtype: Float64
2470+
2471+
>>> s = bpd.Series([1, 2, 3])
2472+
>>> s.update({1: 9})
2473+
>>> s
2474+
0 1
2475+
1 9
2476+
2 3
2477+
dtype: Int64
2478+
2479+
Args:
2480+
other (Series, or object coercible into Series)
2481+
"""
2482+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
2483+
23462484
def all(
23472485
self,
23482486
):

0 commit comments

Comments
 (0)
Failed to load comments.