googleapis · TrevorBergeron · Apr 10, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 9, 2024
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 from bigframes.core.indexes.base import Index
+from bigframes.core.indexes.multi import MultiIndex
 
 __all__ = [
     "Index",
+    "MultiIndex",
 ]
@@ -42,9 +42,15 @@
 
 class Index(vendored_pandas_index.Index):
     __doc__ = vendored_pandas_index.Index.__doc__
-
-    def __init__(
-        self,
+    _query_job = None
+    _block: blocks.Block
+    _linked_frame: Union[
+        bigframes.dataframe.DataFrame, bigframes.series.Series, None
+    ] = None
+
+    # Overrided on __new__ to create subclasses like pandas does
+    def __new__(
+        cls,
         data=None,
         dtype=None,
         *,
@@ -73,18 +79,30 @@ def __init__(
             if dtype is not None:
                 index = index.astype(dtype)
             block = index._block
+        elif isinstance(data, pandas.Index):
+            pd_df = pandas.DataFrame(index=data)
+            block = df.DataFrame(pd_df, session=session)._block
         else:
             pd_index = pandas.Index(data=data, dtype=dtype, name=name)
             pd_df = pandas.DataFrame(index=pd_index)
             block = df.DataFrame(pd_df, session=session)._block
-        self._query_job = None
-        self._block: blocks.Block = block
+
+        # TODO: Support more index subtypes
+        from bigframes.core.indexes.multi import MultiIndex
+
+        klass = MultiIndex if len(block._index_columns) > 1 else cls
+        result = typing.cast(Index, object.__new__(klass))
+        result._query_job = None
+        result._block = block
+        return result
 
     @classmethod
     def from_frame(
         cls, frame: Union[bigframes.series.Series, bigframes.dataframe.DataFrame]
     ) -> Index:
-        return FrameIndex(frame)
+        index = Index(frame._block)
+        index._linked_frame = frame
+        return index
 
     @property
     def name(self) -> blocks.Label:
@@ -107,6 +125,10 @@ def names(self) -> typing.Sequence[blocks.Label]:
     @names.setter
     def names(self, values: typing.Sequence[blocks.Label]):
         new_block = self._block.with_index_labels(values)
+        if self._linked_frame is not None:
+            self._linked_frame._set_block(
+                self._linked_frame._block.with_index_labels(values)
+            )
         self._block = new_block
 
     @property
@@ -452,26 +474,3 @@ def to_numpy(self, dtype=None, **kwargs) -> np.ndarray:
 
     def __len__(self):
         return self.shape[0]
-
-
-# Index that mutates the originating dataframe/series
-class FrameIndex(Index):
-    def __init__(
-        self,
-        series_or_dataframe: typing.Union[
-            bigframes.series.Series, bigframes.dataframe.DataFrame
-        ],
-    ):
-        super().__init__(series_or_dataframe._block)
-        self._whole_frame = series_or_dataframe
-
-    @property
-    def names(self) -> typing.Sequence[blocks.Label]:
-        """Returns the names of the Index."""
-        return self._block._index_labels
-
-    @names.setter
-    def names(self, values: typing.Sequence[blocks.Label]):
-        new_block = self._whole_frame._get_block().with_index_labels(values)
-        self._whole_frame._set_block(new_block)
-        self._block = new_block
@@ -0,0 +1,48 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import cast, Hashable, Iterable, Sequence
+
+import bigframes_vendored.pandas.core.indexes.multi as vendored_pandas_multindex
+import pandas
+
+from bigframes.core.indexes.base import Index
+
+
+class MultiIndex(Index, vendored_pandas_multindex.MultiIndex):
+    __doc__ = vendored_pandas_multindex.MultiIndex.__doc__
+
+    @classmethod
+    def from_tuples(
+        cls,
+        tuples: Iterable[tuple[Hashable, ...]],
+        sortorder: int | None = None,
+        names: Sequence[Hashable] | Hashable | None = None,
+    ) -> MultiIndex:
+        pd_index = pandas.MultiIndex.from_tuples(tuples, sortorder, names)
+        # Index.__new__ should detect multiple levels and properly create a multiindex
+        return cast(MultiIndex, Index(pd_index))
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays,
+        sortorder: int | None = None,
+        names=None,
+    ) -> MultiIndex:
+        pd_index = pandas.MultiIndex.from_arrays(arrays, sortorder, names)
+        # Index.__new__ should detect multiple levels and properly create a multiindex
+        return cast(MultiIndex, Index(pd_index))
@@ -707,6 +707,7 @@ def to_datetime(
 # checking and docstrings.
 DataFrame = bigframes.dataframe.DataFrame
 Index = bigframes.core.indexes.Index
+MultiIndex = bigframes.core.indexes.MultiIndex
 Series = bigframes.series.Series
 
 # Other public pandas attributes
@@ -760,6 +761,7 @@ def to_datetime(
     # Class aliases
     "DataFrame",
     "Index",
+    "MultiIndex",
     "Series",
     # Other public pandas attributes
     "NamedAgg",

@@ -20,6 +20,31 @@
 from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
 
 
+def test_multi_index_from_arrays():
+    bf_idx = bpd.MultiIndex.from_arrays(
+        [
+            pandas.Index([4, 99], dtype=pandas.Int64Dtype()),
+            pandas.Index(
+                [" Hello, World!", "_some_new_string"],
+                dtype=pandas.StringDtype(storage="pyarrow"),
+            ),
+        ],
+        names=[" 1index 1", "_1index 2"],
+    )
+    pd_idx = pandas.MultiIndex.from_arrays(
+        [
+            pandas.Index([4, 99], dtype=pandas.Int64Dtype()),
+            pandas.Index(
+                [" Hello, World!", "_some_new_string"],
+                dtype=pandas.StringDtype(storage="pyarrow"),
+            ),
+        ],
+        names=[" 1index 1", "_1index 2"],
+    )
+    assert bf_idx.names == pd_idx.names
+    pandas.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx)
+
+
 @skip_legacy_pandas
 def test_read_pandas_multi_index_axes():
     index = pandas.MultiIndex.from_arrays(

@@ -0,0 +1,88 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexes/multi.py
+from __future__ import annotations
+
+from typing import Hashable, Iterable, Sequence
+
+import bigframes_vendored.pandas.core.indexes.base
+
+from bigframes import constants
+
+
+class MultiIndex(bigframes_vendored.pandas.core.indexes.base.Index):
+    """
+    A multi-level, or hierarchical, index object for pandas objects.
+    """
+
+    @classmethod
+    def from_tuples(
+        cls,
+        tuples: Iterable[tuple[Hashable, ...]],
+        sortorder: int | None = None,
+        names: Sequence[Hashable] | Hashable | None = None,
+    ) -> MultiIndex:
+        """
+        Convert list of tuples to MultiIndex.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> tuples = [(1, 'red'), (1, 'blue'),
+            ...           (2, 'red'), (2, 'blue')]
+            >>> bpd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
+            MultiIndex([(1,  'red'),
+                        (1, 'blue'),
+                        (2,  'red'),
+                        (2, 'blue')],
+                    names=['number', 'color'])
+
+        Args:
+            tuples (list / sequence of tuple-likes):
+                Each tuple is the index of one row/column.
+            sortorder (int or None):
+                Level of sortedness (must be lexicographically sorted by that
+                level).
+            names (list / sequence of str, optional):
+                Names for the levels in the index.
+
+        Returns:
+            MultiIndex
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays,
+        sortorder: int | None = None,
+        names=None,
+    ) -> MultiIndex:
+        """
+        Convert arrays to MultiIndex.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
+            >>> bpd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
+            MultiIndex([(1,  'red'),
+                        (1, 'blue'),
+                        (2,  'red'),
+                        (2, 'blue')],
+                    names=['number', 'color'])
+
+        Args:
+            arrays (list / sequence of array-likes):
+                Each array-like gives one level's value for each data point.
+                len(arrays) is the number of levels.
+            sortorder (int or None):
+                Level of sortedness (must be lexicographically sorted by that
+                level).
+            names (list / sequence of str, optional):
+                Names for the levels in the index.
+
+        Returns:
+            MultiIndex
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)