Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4e8e97d

Browse files
authoredMar 19, 2024
feat: set force=True by default in DataFrame.peek() (#469)
1 parent 73fe0f8 commit 4e8e97d

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed
 

‎bigframes/dataframe.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1086,19 +1086,19 @@ def head(self, n: int = 5) -> DataFrame:
10861086
def tail(self, n: int = 5) -> DataFrame:
10871087
return typing.cast(DataFrame, self.iloc[-n:])
10881088

1089-
def peek(self, n: int = 5, *, force: bool = False) -> pandas.DataFrame:
1089+
def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame:
10901090
"""
10911091
Preview n arbitrary rows from the dataframe. No guarantees about row selection or ordering.
1092-
DataFrame.peek(force=False) will always be very fast, but will not succeed if data requires
1093-
full data scanning. Using force=True will always succeed, but may be perform expensive
1094-
computations.
1092+
``DataFrame.peek(force=False)`` will always be very fast, but will not succeed if data requires
1093+
full data scanning. Using ``force=True`` will always succeed, but may be perform queries.
1094+
Query results will be cached so that future steps will benefit from these queries.
10951095
10961096
Args:
10971097
n (int, default 5):
10981098
The number of rows to select from the dataframe. Which N rows are returned is non-deterministic.
1099-
force (bool, default False):
1099+
force (bool, default True):
11001100
If the data cannot be peeked efficiently, the dataframe will instead be fully materialized as part
1101-
of the operation if force=True. If force=False, the operation will throw a ValueError.
1101+
of the operation if ``force=True``. If ``force=False``, the operation will throw a ValueError.
11021102
Returns:
11031103
pandas.DataFrame: A pandas DataFrame with n rows.
11041104

‎tests/system/small/test_dataframe.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -429,14 +429,14 @@ def test_rename(scalars_dfs):
429429

430430
def test_df_peek(scalars_dfs):
431431
scalars_df, scalars_pandas_df = scalars_dfs
432-
peek_result = scalars_df.peek(n=3)
432+
peek_result = scalars_df.peek(n=3, force=False)
433433
pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
434434
assert len(peek_result) == 3
435435

436436

437437
def test_df_peek_filtered(scalars_dfs):
438438
scalars_df, scalars_pandas_df = scalars_dfs
439-
peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3)
439+
peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False)
440440
pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
441441
assert len(peek_result) == 3
442442

@@ -449,9 +449,9 @@ def test_df_peek_exception(scalars_dfs):
449449
scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=False)
450450

451451

452-
def test_df_peek_force(scalars_dfs):
452+
def test_df_peek_force_default(scalars_dfs):
453453
scalars_df, scalars_pandas_df = scalars_dfs
454-
peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=True)
454+
peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3)
455455
pd.testing.assert_index_equal(
456456
scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
457457
)

0 commit comments

Comments
 (0)
Failed to load comments.