googleapis · Nov 19, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -27,15 +27,13 @@ If you are still having issues, please be sure to include as much information as
 import sys
 import bigframes
 import google.cloud.bigquery
-import ibis
 import pandas
 import pyarrow
 import sqlglot
 
 print(f"Python: {sys.version}")
 print(f"bigframes=={bigframes.__version__}")
 print(f"google-cloud-bigquery=={google.cloud.bigquery.__version__}")
-print(f"ibis=={ibis.__version__}")
 print(f"pandas=={pandas.__version__}")
 print(f"pyarrow=={pyarrow.__version__}")
 print(f"sqlglot=={sqlglot.__version__}")

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,43 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.28.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.27.0...v1.28.0) (2024-12-11)
+
+
+### Features
+
+* (Series | DataFrame).plot.bar ([#1152](https://github.com/googleapis/python-bigquery-dataframes/issues/1152)) ([0fae2e0](https://github.com/googleapis/python-bigquery-dataframes/commit/0fae2e0291ec8d22341b5b543e8f1b384f83cd3c))
+* `bigframes.bigquery.vector_search` supports `use_brute_force` and `fraction_lists_to_search` parameters ([#1158](https://github.com/googleapis/python-bigquery-dataframes/issues/1158)) ([131edc3](https://github.com/googleapis/python-bigquery-dataframes/commit/131edc3d79f46d35a25422f0db7f150e63e8f561))
+* Add `ARIMAPlus.predict_explain()` to generate forecasts with explanation columns  ([#1177](https://github.com/googleapis/python-bigquery-dataframes/issues/1177)) ([05f8b4d](https://github.com/googleapis/python-bigquery-dataframes/commit/05f8b4d2b2b5f624097228e65a3c42364fc40d36))
+* Add client_endpoints_override to bq options ([#1167](https://github.com/googleapis/python-bigquery-dataframes/issues/1167)) ([be74b99](https://github.com/googleapis/python-bigquery-dataframes/commit/be74b99977cfbd513def5b7e439de6b7706c0712))
+* Add support for temporal types in dataframe's describe() method ([#1189](https://github.com/googleapis/python-bigquery-dataframes/issues/1189)) ([2d564a6](https://github.com/googleapis/python-bigquery-dataframes/commit/2d564a6a9925b69c7e9a15b532fb66ad68c3e264))
+* Allow join-free alignment of analytic expressions ([#1168](https://github.com/googleapis/python-bigquery-dataframes/issues/1168)) ([daef4f0](https://github.com/googleapis/python-bigquery-dataframes/commit/daef4f0c7c5ff2d0a4e9a6ffefeb81f43780ac8b))
+* Series.isin supports bigframes.Series arg ([#1195](https://github.com/googleapis/python-bigquery-dataframes/issues/1195)) ([0d8a16b](https://github.com/googleapis/python-bigquery-dataframes/commit/0d8a16ba77a66dce544d0a7cf411fca0adc2a694))
+* Update llm.TextEmbeddingGenerator to 005 ([#1186](https://github.com/googleapis/python-bigquery-dataframes/issues/1186)) ([3072d38](https://github.com/googleapis/python-bigquery-dataframes/commit/3072d382c6ff57bdb37d7e080c794c67dbf6e701))
+
+
+### Bug Fixes
+
+* Fix error loading local dataframes into bigquery ([#1165](https://github.com/googleapis/python-bigquery-dataframes/issues/1165)) ([5b355ef](https://github.com/googleapis/python-bigquery-dataframes/commit/5b355efde122ed76b1cff39900ab8f94f5a13a30))
+* Fix null index join with 'on' arg ([#1153](https://github.com/googleapis/python-bigquery-dataframes/issues/1153)) ([9015c33](https://github.com/googleapis/python-bigquery-dataframes/commit/9015c33e73675ebb2299487dce3295732ea0527e))
+* Fix series.isin using local path always ([#1202](https://github.com/googleapis/python-bigquery-dataframes/issues/1202)) ([a44eafd](https://github.com/googleapis/python-bigquery-dataframes/commit/a44eafdd95eb1b994dc82411640b61fd0a78a492))
+
+
+### Performance Improvements
+
+* Update df.corr, df.cov to be used with more than 30 columns case. ([#1161](https://github.com/googleapis/python-bigquery-dataframes/issues/1161)) ([9dcf1aa](https://github.com/googleapis/python-bigquery-dataframes/commit/9dcf1aa918919704dcf4d12b05935b22fb502fc6))
+
+
+### Documentation
+
+* Add a code sample using `bpd.options.bigquery.ordering_mode = "partial"` ([#909](https://github.com/googleapis/python-bigquery-dataframes/issues/909)) ([f80d705](https://github.com/googleapis/python-bigquery-dataframes/commit/f80d70503b80559a0b1fe64434383aa3e028bf9b))
+* Add snippet for creating boosted tree model ([#1142](https://github.com/googleapis/python-bigquery-dataframes/issues/1142)) ([a972668](https://github.com/googleapis/python-bigquery-dataframes/commit/a972668833a454fb18e6cb148697165edd46e8cc))
+* Add snippet for evaluating a boosted tree model ([#1154](https://github.com/googleapis/python-bigquery-dataframes/issues/1154)) ([9d8970a](https://github.com/googleapis/python-bigquery-dataframes/commit/9d8970ac1f18b2520a061ac743e767ca8593cc8c))
+* Add snippet for predicting classifications using a boosted tree model ([#1156](https://github.com/googleapis/python-bigquery-dataframes/issues/1156)) ([e7b83f1](https://github.com/googleapis/python-bigquery-dataframes/commit/e7b83f166ef56e631120050103c2f43f454fce44))
+* Add third party `pandas.Index methods` and docstrings ([#1171](https://github.com/googleapis/python-bigquery-dataframes/issues/1171)) ([a970294](https://github.com/googleapis/python-bigquery-dataframes/commit/a9702945286fbe500ade4d0f0c14cc60a8aa00eb))
+* Fix Bigframes.Pandas.General_Function missing docs ([#1164](https://github.com/googleapis/python-bigquery-dataframes/issues/1164)) ([de923d0](https://github.com/googleapis/python-bigquery-dataframes/commit/de923d01b904b96cc51dfd526b6a412f28ff10c4))
+* Update `bigframes.pandas.Index` docstrings ([#1144](https://github.com/googleapis/python-bigquery-dataframes/issues/1144)) ([557ab8d](https://github.com/googleapis/python-bigquery-dataframes/commit/557ab8df526fcf743af0a609ec7ec636b00d0c0b))
+
 ## [1.27.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.26.0...v1.27.0) (2024-11-16)
 
 

diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
@@ -91,6 +91,7 @@ def __init__(
         skip_bq_connection_check: bool = False,
         *,
         ordering_mode: Literal["strict", "partial"] = "strict",
+        client_endpoints_override: dict = {},
     ):
         self._credentials = credentials
         self._project = project
@@ -103,6 +104,7 @@ def __init__(
         self._session_started = False
         # Determines the ordering strictness for the session.
         self._ordering_mode = _validate_ordering_mode(ordering_mode)
+        self._client_endpoints_override = client_endpoints_override
 
     @property
     def application_name(self) -> Optional[str]:
@@ -317,3 +319,21 @@ def ordering_mode(self) -> Literal["strict", "partial"]:
     @ordering_mode.setter
     def ordering_mode(self, ordering_mode: Literal["strict", "partial"]) -> None:
         self._ordering_mode = _validate_ordering_mode(ordering_mode)
+
+    @property
+    def client_endpoints_override(self) -> dict:
+        """Option that sets the BQ client endpoints addresses directly as a dict. Possible keys are "bqclient", "bqconnectionclient", "bqstoragereadclient"."""
+        return self._client_endpoints_override
+
+    @client_endpoints_override.setter
+    def client_endpoints_override(self, value: dict):
+        warnings.warn(
+            "This is an advanced configuration option for directly setting endpoints. Incorrect use may lead to unexpected behavior or system instability. Proceed only if you fully understand its implications."
+        )
+
+        if self._session_started and self._client_endpoints_override != value:
+            raise ValueError(
+                SESSION_STARTED_MESSAGE.format(attribute="client_endpoints_override")
+            )
+
+        self._client_endpoints_override = value
diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py
@@ -21,8 +21,8 @@ class ExperimentOptions:
     """
 
     def __init__(self):
-        self._semantic_operators = False
-        self._blob = False
+        self._semantic_operators: bool = False
+        self._blob: bool = False
 
     @property
     def semantic_operators(self) -> bool:

diff --git a/bigframes/bigquery/_operations/search.py b/bigframes/bigquery/_operations/search.py
@@ -18,7 +18,6 @@
 import typing
 from typing import Collection, Literal, Mapping, Optional, Union
 
-import bigframes_vendored.constants as constants
 import google.cloud.bigquery as bigquery
 
 import bigframes.core.sql
@@ -96,10 +95,10 @@ def vector_search(
     query: Union[dataframe.DataFrame, series.Series],
     *,
     query_column_to_search: Optional[str] = None,
-    top_k: Optional[int] = 10,
-    distance_type: Literal["euclidean", "cosine"] = "euclidean",
+    top_k: Optional[int] = None,
+    distance_type: Optional[Literal["euclidean", "cosine", "dot_product"]] = None,
     fraction_lists_to_search: Optional[float] = None,
-    use_brute_force: bool = False,
+    use_brute_force: Optional[bool] = None,
 ) -> dataframe.DataFrame:
     """
     Conduct vector search which searches embeddings to find semantically similar entities.
@@ -141,7 +140,8 @@ def vector_search(
         ...             base_table="bigframes-dev.bigframes_tests_sys.base_table",
         ...             column_to_search="my_embedding",
         ...             query=search_query,
-        ...             top_k=2)
+        ...             top_k=2,
+        ...             use_brute_force=True)
              embedding  id my_embedding  distance
         dog    [1. 2.]   1      [1. 2.]       0.0
         cat  [3.  5.2]   5    [5.  5.4]  2.009975
@@ -185,17 +185,18 @@ def vector_search(
             find nearest neighbors. The column must have a type of ``ARRAY<FLOAT64>``. All elements in
             the array must be non-NULL and all values in the column must have the same array dimensions
             as the values in the ``column_to_search`` column. Can only be set when query is a DataFrame.
-        top_k (int, default 10):
+        top_k (int):
             Sepecifies the number of nearest neighbors to return. Default to 10.
         distance_type (str, defalt "euclidean"):
             Specifies the type of metric to use to compute the distance between two vectors.
-            Possible values are "euclidean" and "cosine". Default to "euclidean".
+            Possible values are "euclidean", "cosine" and "dot_product".
+            Default to "euclidean".
         fraction_lists_to_search (float, range in [0.0, 1.0]):
             Specifies the percentage of lists to search. Specifying a higher percentage leads to
             higher recall and slower performance, and the converse is true when specifying a lower
             percentage. It is only used when a vector index is also used. You can only specify
             ``fraction_lists_to_search`` when ``use_brute_force`` is set to False.
-        use_brute_force (bool, default False):
+        use_brute_force (bool):
             Determines whether to use brute force search by skipping the vector index if one is available.
             Default to False.
 
@@ -204,37 +205,35 @@ def vector_search(
     """
     import bigframes.series
 
-    if not fraction_lists_to_search and use_brute_force is True:
-        raise ValueError(
-            "You can't specify fraction_lists_to_search when use_brute_force is set to True."
-        )
     if (
         isinstance(query, bigframes.series.Series)
         and query_column_to_search is not None
     ):
         raise ValueError(
             "You can't specify query_column_to_search when query is a Series."
         )
-    # TODO(ashleyxu): Support options in vector search. b/344019989
-    if fraction_lists_to_search is not None or use_brute_force is True:
-        raise NotImplementedError(
-            f"fraction_lists_to_search and use_brute_force is not supported. {constants.FEEDBACK_LINK}"
-        )
-    options = {
-        "base_table": base_table,
-        "column_to_search": column_to_search,
-        "query_column_to_search": query_column_to_search,
-        "distance_type": distance_type,
-        "top_k": top_k,
-        "fraction_lists_to_search": fraction_lists_to_search,
-        "use_brute_force": use_brute_force,
-    }
 
-    (query,) = utils.convert_to_dataframe(query)
+    # Only populate options if not set to the default value.
+    # This avoids accidentally setting options that are mutually exclusive.
+    options = None
+    if fraction_lists_to_search is not None:
+        options = {} if options is None else options
+        options["fraction_lists_to_search"] = fraction_lists_to_search
+    if use_brute_force is not None:
+        options = {} if options is None else options
+        options["use_brute_force"] = use_brute_force
+
+    (query,) = utils.batch_convert_to_dataframe(query)
     sql_string, index_col_ids, index_labels = query._to_sql_query(include_index=True)
 
     sql = bigframes.core.sql.create_vector_search_sql(
-        sql_string=sql_string, options=options  # type: ignore
+        sql_string=sql_string,
+        base_table=base_table,
+        column_to_search=column_to_search,
+        query_column_to_search=query_column_to_search,
+        top_k=top_k,
+        distance_type=distance_type,
+        options=options,
     )
     if index_col_ids is not None:
         df = query._session.read_gbq(sql, index_col=index_col_ids)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
@@ -26,7 +26,6 @@
 import pyarrow as pa
 import pyarrow.feather as pa_feather
 
-import bigframes.core.compile
 import bigframes.core.expression as ex
 import bigframes.core.guid
 import bigframes.core.identifiers as ids
@@ -35,15 +34,13 @@
 import bigframes.core.nodes as nodes
 from bigframes.core.ordering import OrderingExpression
 import bigframes.core.ordering as orderings
-import bigframes.core.rewrite
 import bigframes.core.schema as schemata
 import bigframes.core.tree_properties
 import bigframes.core.utils
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
-import bigframes.session._io.bigquery
 
 if typing.TYPE_CHECKING:
     from bigframes.session import Session
@@ -199,6 +196,8 @@ def as_cached(
 
     def _try_evaluate_local(self):
         """Use only for unit testing paths - not fully featured. Will throw exception if fails."""
+        import bigframes.core.compile
+
         return bigframes.core.compile.test_only_try_evaluate(self.node)
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
@@ -422,22 +421,7 @@ def relational_join(
         l_mapping = {  # Identity mapping, only rename right side
             lcol.name: lcol.name for lcol in self.node.ids
         }
-        r_mapping = {  # Rename conflicting names
-            rcol.name: rcol.name
-            if (rcol.name not in l_mapping)
-            else bigframes.core.guid.generate_guid()
-            for rcol in other.node.ids
-        }
-        other_node = other.node
-        if set(other_node.ids) & set(self.node.ids):
-            other_node = nodes.SelectionNode(
-                other_node,
-                tuple(
-                    (ex.deref(old_id), ids.ColumnId(new_id))
-                    for old_id, new_id in r_mapping.items()
-                ),
-            )
-
+        other_node, r_mapping = self.prepare_join_names(other)
         join_node = nodes.JoinNode(
             left_child=self.node,
             right_child=other_node,
@@ -449,14 +433,63 @@ def relational_join(
         )
         return ArrayValue(join_node), (l_mapping, r_mapping)
 
-    def try_align_as_projection(
+    def try_row_join(
+        self,
+        other: ArrayValue,
+        conditions: typing.Tuple[typing.Tuple[str, str], ...] = (),
+    ) -> Optional[
+        typing.Tuple[ArrayValue, typing.Tuple[dict[str, str], dict[str, str]]]
+    ]:
+        l_mapping = {  # Identity mapping, only rename right side
+            lcol.name: lcol.name for lcol in self.node.ids
+        }
+        other_node, r_mapping = self.prepare_join_names(other)
+        import bigframes.core.rewrite
+
+        result_node = bigframes.core.rewrite.try_join_as_projection(
+            self.node, other_node, conditions
+        )
+        if result_node is None:
+            return None
+
+        return (
+            ArrayValue(result_node),
+            (l_mapping, r_mapping),
+        )
+
+    def prepare_join_names(
+        self, other: ArrayValue
+    ) -> Tuple[bigframes.core.nodes.BigFrameNode, dict[str, str]]:
+        if set(other.node.ids) & set(self.node.ids):
+            r_mapping = {  # Rename conflicting names
+                rcol.name: rcol.name
+                if (rcol.name not in self.column_ids)
+                else bigframes.core.guid.generate_guid()
+                for rcol in other.node.ids
+            }
+            return (
+                nodes.SelectionNode(
+                    other.node,
+                    tuple(
+                        (ex.deref(old_id), ids.ColumnId(new_id))
+                        for old_id, new_id in r_mapping.items()
+                    ),
+                ),
+                r_mapping,
+            )
+        else:
+            return other.node, {id: id for id in other.column_ids}
+
+    def try_legacy_row_join(
         self,
         other: ArrayValue,
         join_type: join_def.JoinType,
         join_keys: typing.Tuple[join_def.CoalescedColumnMapping, ...],
         mappings: typing.Tuple[join_def.JoinColumnMapping, ...],
     ) -> typing.Optional[ArrayValue]:
-        result = bigframes.core.rewrite.join_as_projection(
+        import bigframes.core.rewrite
+
+        result = bigframes.core.rewrite.legacy_join_as_projection(
             self.node, other.node, join_keys, mappings, join_type
         )
         if result is not None:
@@ -488,11 +521,4 @@ def _gen_namespaced_uid(self) -> str:
         return self._gen_namespaced_uids(1)[0]
 
     def _gen_namespaced_uids(self, n: int) -> List[str]:
-        i = len(self.node.defined_variables)
-        genned_ids: List[str] = []
-        while len(genned_ids) < n:
-            attempted_id = f"col_{i}"
-            if attempted_id not in self.node.defined_variables:
-                genned_ids.append(attempted_id)
-            i = i + 1
-        return genned_ids
+        return [ids.ColumnId.unique().name for _ in range(n)]