|
| 1 | +# Copyright 2024 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""Module for bigquery continuous queries""" |
| 16 | + |
| 17 | +import json |
| 18 | +from typing import Optional |
| 19 | + |
| 20 | +from google.cloud import bigquery |
| 21 | + |
| 22 | +import bigframes |
| 23 | + |
| 24 | + |
| 25 | +def to_bigtable( |
| 26 | + query: str, |
| 27 | + instance: str, |
| 28 | + table: str, |
| 29 | + bq_client: Optional[bigquery.Client] = None, |
| 30 | + app_profile: Optional[str] = None, |
| 31 | + truncate: bool = False, |
| 32 | + overwrite: bool = False, |
| 33 | + auto_create_column_families: bool = False, |
| 34 | + bigtable_options: Optional[dict] = None, |
| 35 | + job_id: Optional[str] = None, |
| 36 | + job_id_prefix: Optional[str] = None, |
| 37 | +) -> bigquery.QueryJob: |
| 38 | + """Launches a BigQuery continuous query and returns a |
| 39 | + QueryJob object for some management functionality. |
| 40 | +
|
| 41 | + This method requires an existing bigtable preconfigured to |
| 42 | + accept the continuous query export statement. For instructions |
| 43 | + on export to bigtable, see |
| 44 | + https://cloud.google.com/bigquery/docs/export-to-bigtable. |
| 45 | +
|
| 46 | + Args: |
| 47 | + query (str): |
| 48 | + The sql statement to execute as a continuous function. |
| 49 | + For example: "SELECT * FROM dataset.table" |
| 50 | + This will be wrapped in an EXPORT DATA statement to |
| 51 | + launch a continuous query writing to bigtable. |
| 52 | + instance (str): |
| 53 | + The name of the bigtable instance to export to. |
| 54 | + table (str): |
| 55 | + The name of the bigtable table to export to. |
| 56 | + bq_client (str, default None): |
| 57 | + The Client object to use for the query. This determines |
| 58 | + the project id and location of the query. If None, will |
| 59 | + default to the bigframes global session default client. |
| 60 | + app_profile (str, default None): |
| 61 | + The bigtable app profile to export to. If None, no app |
| 62 | + profile will be used. |
| 63 | + truncate (bool, default False): |
| 64 | + The export truncate option, see |
| 65 | + https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#bigtable_export_option |
| 66 | + overwrite (bool, default False): |
| 67 | + The export overwrite option, see |
| 68 | + https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#bigtable_export_option |
| 69 | + auto_create_column_families (bool, default False): |
| 70 | + The auto_create_column_families option, see |
| 71 | + https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#bigtable_export_option |
| 72 | + bigtable_options (dict, default None): |
| 73 | + The bigtable options dict, which will be converted to JSON |
| 74 | + using json.dumps, see |
| 75 | + https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#bigtable_export_option |
| 76 | + If None, no bigtable_options parameter will be passed. |
| 77 | + job_id (str, default None): |
| 78 | + If specified, replace the default job id for the query, |
| 79 | + see job_id parameter of |
| 80 | + https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_query |
| 81 | + job_id_prefix (str, default None): |
| 82 | + If specified, a job id prefix for the query, see |
| 83 | + job_id_prefix parameter of |
| 84 | + https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_query |
| 85 | +
|
| 86 | + Returns: |
| 87 | + google.cloud.bigquery.QueryJob: |
| 88 | + See https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob |
| 89 | + The ongoing query job can be managed using this object. |
| 90 | + For example, the job can be cancelled or its error status |
| 91 | + can be examined. |
| 92 | + """ |
| 93 | + # get default client if not passed |
| 94 | + if bq_client is None: |
| 95 | + bq_client = bigframes.get_global_session().bqclient |
| 96 | + |
| 97 | + # build export string from parameters |
| 98 | + project = bq_client.project |
| 99 | + |
| 100 | + app_profile_url_string = "" |
| 101 | + if app_profile is not None: |
| 102 | + app_profile_url_string = f"appProfiles/{app_profile}/" |
| 103 | + |
| 104 | + bigtable_options_parameter_string = "" |
| 105 | + if bigtable_options is not None: |
| 106 | + bigtable_options_parameter_string = ( |
| 107 | + 'bigtable_options = """' + json.dumps(bigtable_options) + '""",\n' |
| 108 | + ) |
| 109 | + |
| 110 | + sql = ( |
| 111 | + "EXPORT DATA\n" |
| 112 | + "OPTIONS (\n" |
| 113 | + "format = 'CLOUD_BIGTABLE',\n" |
| 114 | + f"{bigtable_options_parameter_string}" |
| 115 | + f"truncate = {str(truncate)},\n" |
| 116 | + f"overwrite = {str(overwrite)},\n" |
| 117 | + f"auto_create_column_families = {str(auto_create_column_families)},\n" |
| 118 | + f'uri = "https://bigtable.googleapis.com/projects/{project}/instances/{instance}/{app_profile_url_string}tables/{table}"\n' |
| 119 | + ")\n" |
| 120 | + "AS (\n" |
| 121 | + f"{query});" |
| 122 | + ) |
| 123 | + |
| 124 | + # override continuous http parameter |
| 125 | + job_config = bigquery.job.QueryJobConfig() |
| 126 | + job_config_filled = job_config.from_api_repr({"query": {"continuous": True}}) |
| 127 | + |
| 128 | + # begin the query job |
| 129 | + query_job = bq_client.query( |
| 130 | + sql, |
| 131 | + job_config=job_config_filled, # type:ignore |
| 132 | + # typing error above is in bq client library |
| 133 | + # (should accept abstract job_config, only takes concrete) |
| 134 | + job_id=job_id, |
| 135 | + job_id_prefix=job_id_prefix, |
| 136 | + ) |
| 137 | + |
| 138 | + # return the query job to the user for lifetime management |
| 139 | + return query_job |
0 commit comments