Skip to content

Commit

Permalink
Add autodetect arg in BQCreateExternalTable Operator (#22710)
Browse files Browse the repository at this point in the history
* Add autodetect parameter

* Update docstring

* Update google provider documentation
  • Loading branch information
hankehly authored Apr 4, 2022
1 parent 215993b commit f9e1847
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
7 changes: 7 additions & 0 deletions airflow/providers/google/cloud/operators/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,9 @@ class BigQueryCreateExternalTableOperator(BaseOperator):
:param schema_object: If set, a GCS object path pointing to a .json file that
contains the schema for the table. (templated)
:param source_format: File format of the data.
:param autodetect: Try to detect schema and format options automatically.
The schema_fields and schema_object options will be honored when specified explicitly.
https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
:param compression: [Optional] The compression type of the data source.
Possible values include GZIP and NONE.
The default value is NONE.
Expand Down Expand Up @@ -1028,6 +1031,7 @@ def __init__(
schema_fields: Optional[List] = None,
schema_object: Optional[str] = None,
source_format: Optional[str] = None,
autodetect: bool = False,
compression: Optional[str] = None,
skip_leading_rows: Optional[int] = None,
field_delimiter: Optional[str] = None,
Expand Down Expand Up @@ -1057,6 +1061,7 @@ def __init__(
skip_leading_rows,
field_delimiter,
max_bad_records,
autodetect,
quote_character,
allow_quoted_newlines,
allow_jagged_rows,
Expand Down Expand Up @@ -1116,6 +1121,7 @@ def __init__(
self.bigquery_conn_id = bigquery_conn_id
self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
self.delegate_to = delegate_to
self.autodetect = autodetect

self.src_fmt_configs = src_fmt_configs or {}
self.labels = labels
Expand Down Expand Up @@ -1153,6 +1159,7 @@ def execute(self, context: 'Context') -> None:
schema_fields=schema_fields,
source_uris=source_uris,
source_format=self.source_format,
autodetect=self.autodetect,
compression=self.compression,
skip_leading_rows=self.skip_leading_rows,
field_delimiter=self.field_delimiter,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ Or you may point the operator to a Google Cloud Storage object name where the sc
:start-after: [START howto_operator_bigquery_create_table_schema_json]
:end-before: [END howto_operator_bigquery_create_table_schema_json]

To use BigQuery `schema auto-detection <https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources>`__,
set the ``autodetect`` flag instead of providing explicit schema information.

.. _howto/operator:BigQueryGetDataOperator:

Fetch data from table
Expand Down
2 changes: 2 additions & 0 deletions tests/providers/google/cloud/operators/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def test_execute(self, mock_hook):
bucket=TEST_GCS_BUCKET,
source_objects=TEST_GCS_DATA,
source_format=TEST_SOURCE_FORMAT,
autodetect=True,
)

operator.execute(None)
Expand All @@ -205,6 +206,7 @@ def test_execute(self, mock_hook):
schema_fields=[],
source_uris=[f'gs://{TEST_GCS_BUCKET}/{source_object}' for source_object in TEST_GCS_DATA],
source_format=TEST_SOURCE_FORMAT,
autodetect=True,
compression='NONE',
skip_leading_rows=0,
field_delimiter=',',
Expand Down

0 comments on commit f9e1847

Please sign in to comment.