Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8fc26c4

Browse files
authoredApr 12, 2024
docs: add docs for DataFrame and Series dunder methods (#562)
* docs: add docs for `DataFrame.{radd,__add__,__radd__}` * fix rendering, revert ineffective changes, add __eq__ * newline * docs for more df dunders * fix mypy errors and couple of wordings * fix sub and rmod, add docs for __bool__, __nonzero__, __getattr__ * add documentation for Series dunders * fix doctest failure with python 3.12 * move docstrings to third_party for compliance safety * add DataFrame.__getitem__ docstring and code samples * add dunder doc overrides from third_party
1 parent 4ec8034 commit 8fc26c4

File tree

5 files changed

+1803
-75
lines changed

5 files changed

+1803
-75
lines changed
 

‎bigframes/dataframe.py

+132-28
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import annotations
1818

1919
import datetime
20+
import inspect
2021
import re
2122
import sys
2223
import textwrap
@@ -314,6 +315,8 @@ def __len__(self):
314315
rows, _ = self.shape
315316
return rows
316317

318+
__len__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__len__)
319+
317320
def __iter__(self):
318321
return iter(self.columns)
319322

@@ -466,7 +469,6 @@ def __getitem__(
466469
bigframes.series.Series,
467470
],
468471
): # No return type annotations (like pandas) as type cannot always be determined statically
469-
"""Gets the specified column(s) from the DataFrame."""
470472
# NOTE: This implements the operations described in
471473
# https://pandas.pydata.org/docs/getting_started/intro_tutorials/03_subset_data.html
472474

@@ -498,6 +500,8 @@ def __getitem__(
498500

499501
return DataFrame(self._block.select_columns(selected_ids))
500502

503+
__getitem__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__getitem__)
504+
501505
def _getitem_label(self, key: blocks.Label):
502506
col_ids = self._block.cols_matching_label(key)
503507
if len(col_ids) == 0:
@@ -642,14 +646,11 @@ def _repr_html_(self) -> str:
642646
return html_string
643647

644648
def __setitem__(self, key: str, value: SingleItemValue):
645-
"""Modify or insert a column into the DataFrame.
646-
647-
Note: This does **not** modify the original table the DataFrame was
648-
derived from.
649-
"""
650649
df = self._assign_single_item(key, value)
651650
self._set_block(df._get_block())
652651

652+
__setitem__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__setitem__)
653+
653654
def _apply_binop(
654655
self,
655656
other: float | int | bigframes.series.Series | DataFrame,
@@ -838,32 +839,50 @@ def _apply_dataframe_binop(
838839
def eq(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
839840
return self._apply_binop(other, ops.eq_op, axis=axis)
840841

842+
def __eq__(self, other) -> DataFrame: # type: ignore
843+
return self.eq(other)
844+
845+
__eq__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__eq__)
846+
841847
def ne(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
842848
return self._apply_binop(other, ops.ne_op, axis=axis)
843849

844-
__eq__ = eq # type: ignore
850+
def __ne__(self, other) -> DataFrame: # type: ignore
851+
return self.ne(other)
845852

846-
__ne__ = ne # type: ignore
853+
__ne__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__ne__)
847854

848855
def le(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
849856
return self._apply_binop(other, ops.le_op, axis=axis)
850857

858+
def __le__(self, other) -> DataFrame:
859+
return self.le(other)
860+
861+
__le__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__le__)
862+
851863
def lt(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
852864
return self._apply_binop(other, ops.lt_op, axis=axis)
853865

866+
def __lt__(self, other) -> DataFrame:
867+
return self.lt(other)
868+
869+
__lt__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__lt__)
870+
854871
def ge(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
855872
return self._apply_binop(other, ops.ge_op, axis=axis)
856873

857-
def gt(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
858-
return self._apply_binop(other, ops.gt_op, axis=axis)
874+
def __ge__(self, other) -> DataFrame:
875+
return self.ge(other)
859876

860-
__lt__ = lt
877+
__ge__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__ge__)
861878

862-
__le__ = le
879+
def gt(self, other: typing.Any, axis: str | int = "columns") -> DataFrame:
880+
return self._apply_binop(other, ops.gt_op, axis=axis)
863881

864-
__gt__ = gt
882+
def __gt__(self, other) -> DataFrame:
883+
return self.gt(other)
865884

866-
__ge__ = ge
885+
__gt__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__gt__)
867886

868887
def add(
869888
self,
@@ -874,7 +893,21 @@ def add(
874893
# TODO(swast): Support level parameter with MultiIndex.
875894
return self._apply_binop(other, ops.add_op, axis=axis)
876895

877-
__radd__ = __add__ = radd = add
896+
def radd(
897+
self,
898+
other: float | int | bigframes.series.Series | DataFrame,
899+
axis: str | int = "columns",
900+
) -> DataFrame:
901+
# TODO(swast): Support fill_value parameter.
902+
# TODO(swast): Support level parameter with MultiIndex.
903+
return self.add(other, axis=axis)
904+
905+
def __add__(self, other) -> DataFrame:
906+
return self.add(other)
907+
908+
__add__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__add__)
909+
910+
__radd__ = __add__
878911

879912
def sub(
880913
self,
@@ -883,7 +916,13 @@ def sub(
883916
) -> DataFrame:
884917
return self._apply_binop(other, ops.sub_op, axis=axis)
885918

886-
__sub__ = subtract = sub
919+
subtract = sub
920+
subtract.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.sub)
921+
922+
def __sub__(self, other):
923+
return self.sub(other)
924+
925+
__sub__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__sub__)
887926

888927
def rsub(
889928
self,
@@ -892,7 +931,10 @@ def rsub(
892931
) -> DataFrame:
893932
return self._apply_binop(other, ops.sub_op, axis=axis, reverse=True)
894933

895-
__rsub__ = rsub
934+
def __rsub__(self, other):
935+
return self.rsub(other)
936+
937+
__rsub__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__rsub__)
896938

897939
def mul(
898940
self,
@@ -901,7 +943,25 @@ def mul(
901943
) -> DataFrame:
902944
return self._apply_binop(other, ops.mul_op, axis=axis)
903945

904-
__rmul__ = __mul__ = rmul = multiply = mul
946+
multiply = mul
947+
multiply.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.mul)
948+
949+
def __mul__(self, other):
950+
return self.mul(other)
951+
952+
__mul__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__mul__)
953+
954+
def rmul(
955+
self,
956+
other: float | int | bigframes.series.Series | DataFrame,
957+
axis: str | int = "columns",
958+
) -> DataFrame:
959+
return self.mul(other, axis=axis)
960+
961+
def __rmul__(self, other):
962+
return self.rmul(other)
963+
964+
__rmul__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__rmul__)
905965

906966
def truediv(
907967
self,
@@ -910,7 +970,13 @@ def truediv(
910970
) -> DataFrame:
911971
return self._apply_binop(other, ops.div_op, axis=axis)
912972

913-
div = divide = __truediv__ = truediv
973+
truediv.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.truediv)
974+
div = divide = truediv
975+
976+
def __truediv__(self, other):
977+
return self.truediv(other)
978+
979+
__truediv__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__truediv__)
914980

915981
def rtruediv(
916982
self,
@@ -919,7 +985,13 @@ def rtruediv(
919985
) -> DataFrame:
920986
return self._apply_binop(other, ops.div_op, axis=axis, reverse=True)
921987

922-
__rtruediv__ = rdiv = rtruediv
988+
rdiv = rtruediv
989+
rdiv.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.rtruediv)
990+
991+
def __rtruediv__(self, other):
992+
return self.rtruediv(other)
993+
994+
__rtruediv__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__rtruediv__)
923995

924996
def floordiv(
925997
self,
@@ -928,7 +1000,10 @@ def floordiv(
9281000
) -> DataFrame:
9291001
return self._apply_binop(other, ops.floordiv_op, axis=axis)
9301002

931-
__floordiv__ = floordiv
1003+
def __floordiv__(self, other):
1004+
return self.floordiv(other)
1005+
1006+
__floordiv__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__floordiv__)
9321007

9331008
def rfloordiv(
9341009
self,
@@ -937,31 +1012,48 @@ def rfloordiv(
9371012
) -> DataFrame:
9381013
return self._apply_binop(other, ops.floordiv_op, axis=axis, reverse=True)
9391014

940-
__rfloordiv__ = rfloordiv
1015+
def __rfloordiv__(self, other):
1016+
return self.rfloordiv(other)
1017+
1018+
__rfloordiv__.__doc__ = inspect.getdoc(
1019+
vendored_pandas_frame.DataFrame.__rfloordiv__
1020+
)
9411021

9421022
def mod(self, other: int | bigframes.series.Series | DataFrame, axis: str | int = "columns") -> DataFrame: # type: ignore
9431023
return self._apply_binop(other, ops.mod_op, axis=axis)
9441024

1025+
def __mod__(self, other):
1026+
return self.mod(other)
1027+
1028+
__mod__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__mod__)
1029+
9451030
def rmod(self, other: int | bigframes.series.Series | DataFrame, axis: str | int = "columns") -> DataFrame: # type: ignore
9461031
return self._apply_binop(other, ops.mod_op, axis=axis, reverse=True)
9471032

948-
__mod__ = mod
1033+
def __rmod__(self, other):
1034+
return self.rmod(other)
9491035

950-
__rmod__ = rmod
1036+
__rmod__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__rmod__)
9511037

9521038
def pow(
9531039
self, other: int | bigframes.series.Series, axis: str | int = "columns"
9541040
) -> DataFrame:
9551041
return self._apply_binop(other, ops.pow_op, axis=axis)
9561042

1043+
def __pow__(self, other):
1044+
return self.pow(other)
1045+
1046+
__pow__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__pow__)
1047+
9571048
def rpow(
9581049
self, other: int | bigframes.series.Series, axis: str | int = "columns"
9591050
) -> DataFrame:
9601051
return self._apply_binop(other, ops.pow_op, axis=axis, reverse=True)
9611052

962-
__pow__ = pow
1053+
def __rpow__(self, other):
1054+
return self.rpow(other)
9631055

964-
__rpow__ = rpow
1056+
__rpow__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__rpow__)
9651057

9661058
def align(
9671059
self,
@@ -1971,6 +2063,7 @@ def prod(
19712063
return bigframes.series.Series(block.select_column("values"))
19722064

19732065
product = prod
2066+
product.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.prod)
19742067

19752068
def count(self, *, numeric_only: bool = False) -> bigframes.series.Series:
19762069
if not numeric_only:
@@ -2010,6 +2103,7 @@ def agg(
20102103
)
20112104

20122105
aggregate = agg
2106+
aggregate.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.agg)
20132107

20142108
def idxmin(self) -> bigframes.series.Series:
20152109
return bigframes.series.Series(block_ops.idxmin(self._block))
@@ -2083,6 +2177,7 @@ def kurt(self, *, numeric_only: bool = False):
20832177
return bigframes.series.Series(result_block)
20842178

20852179
kurtosis = kurt
2180+
kurtosis.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.kurt)
20862181

20872182
def _pivot(
20882183
self,
@@ -2542,11 +2637,13 @@ def isna(self) -> DataFrame:
25422637
return self._apply_unary_op(ops.isnull_op)
25432638

25442639
isnull = isna
2640+
isnull.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.isna)
25452641

25462642
def notna(self) -> DataFrame:
25472643
return self._apply_unary_op(ops.notnull_op)
25482644

25492645
notnull = notna
2646+
notnull.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.notna)
25502647

25512648
def cumsum(self):
25522649
is_numeric_types = [
@@ -2860,7 +2957,10 @@ def to_numpy(
28602957
) -> numpy.ndarray:
28612958
return self.to_pandas().to_numpy(dtype, copy, na_value, **kwargs)
28622959

2863-
__array__ = to_numpy
2960+
def __array__(self, dtype=None) -> numpy.ndarray:
2961+
return self.to_numpy(dtype=dtype)
2962+
2963+
__array__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__array__)
28642964

28652965
def to_parquet(
28662966
self,
@@ -3227,6 +3327,7 @@ def first_valid_index(self):
32273327
return
32283328

32293329
applymap = map
3330+
applymap.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.map)
32303331

32313332
def _slice(
32323333
self,
@@ -3367,4 +3468,7 @@ def get_right_id(id):
33673468
def plot(self):
33683469
return plotting.PlotAccessor(self)
33693470

3370-
__matmul__ = dot
3471+
def __matmul__(self, other) -> DataFrame:
3472+
return self.dot(other)
3473+
3474+
__matmul__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__matmul__)
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Failed to load comments.