Skip to content

Commit

Permalink
Add additional Cloud Datastore operators (#10032)
Browse files Browse the repository at this point in the history
This PR adds more operators for Google Cloud Datastore
service. It also adds missing tests and how-to guides.
  • Loading branch information
turbaszek authored Aug 3, 2020
1 parent d0776cf commit 6efa1b9
Show file tree
Hide file tree
Showing 10 changed files with 835 additions and 10 deletions.
105 changes: 103 additions & 2 deletions airflow/providers/google/cloud/example_dags/example_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
"""

import os
from typing import Any, Dict

from airflow import models
from airflow.providers.google.cloud.operators.datastore import (
CloudDatastoreAllocateIdsOperator, CloudDatastoreBeginTransactionOperator, CloudDatastoreCommitOperator,
CloudDatastoreExportEntitiesOperator, CloudDatastoreImportEntitiesOperator,
CloudDatastoreRollbackOperator, CloudDatastoreRunQueryOperator,
)
from airflow.utils import dates

Expand All @@ -37,20 +40,118 @@
"example_gcp_datastore",
schedule_interval=None, # Override to match your needs
start_date=dates.days_ago(1),
tags=['example'],
tags=["example"],
) as dag:
# [START how_to_export_task]
export_task = CloudDatastoreExportEntitiesOperator(
task_id="export_task",
bucket=BUCKET,
project_id=GCP_PROJECT_ID,
overwrite_existing=True,
)
# [END how_to_export_task]

# [START how_to_import_task]
import_task = CloudDatastoreImportEntitiesOperator(
task_id="import_task",
bucket="{{ task_instance.xcom_pull('export_task')['response']['outputUrl'].split('/')[2] }}",
file="{{ '/'.join(task_instance.xcom_pull('export_task')['response']['outputUrl'].split('/')[3:]) }}",
project_id=GCP_PROJECT_ID
project_id=GCP_PROJECT_ID,
)
# [END how_to_import_task]

export_task >> import_task

# [START how_to_keys_def]
KEYS = [
{
"partitionId": {"projectId": GCP_PROJECT_ID, "namespaceId": ""},
"path": {"kind": "airflow"},
}
]
# [END how_to_keys_def]

# [START how_to_transaction_def]
TRANSACTION_OPTIONS: Dict[str, Any] = {"readWrite": {}}
# [END how_to_transaction_def]

# [START how_to_commit_def]
COMMIT_BODY = {
"mode": "TRANSACTIONAL",
"mutations": [
{
"insert": {
"key": KEYS[0],
"properties": {"string": {"stringValue": "airflow is awesome!"}},
}
}
],
"transaction": "{{ task_instance.xcom_pull('begin_transaction_commit') }}",
}
# [END how_to_commit_def]

# [START how_to_query_def]
QUERY = {
"partitionId": {"projectId": GCP_PROJECT_ID, "namespaceId": ""},
"readOptions": {
"transaction": "{{ task_instance.xcom_pull('begin_transaction_query') }}"
},
"query": {},
}
# [END how_to_query_def]

with models.DAG(
"example_gcp_datastore_operations",
start_date=dates.days_ago(1),
schedule_interval=None, # Override to match your needs
tags=["example"],
) as dag2:
# [START how_to_allocate_ids]
allocate_ids = CloudDatastoreAllocateIdsOperator(
task_id="allocate_ids", partial_keys=KEYS, project_id=GCP_PROJECT_ID
)
# [END how_to_allocate_ids]

# [START how_to_begin_transaction]
begin_transaction_commit = CloudDatastoreBeginTransactionOperator(
task_id="begin_transaction_commit",
transaction_options=TRANSACTION_OPTIONS,
project_id=GCP_PROJECT_ID,
)
# [END how_to_begin_transaction]

# [START how_to_commit_task]
commit_task = CloudDatastoreCommitOperator(
task_id="commit_task", body=COMMIT_BODY, project_id=GCP_PROJECT_ID
)
# [END how_to_commit_task]

allocate_ids >> begin_transaction_commit >> commit_task

begin_transaction_query = CloudDatastoreBeginTransactionOperator(
task_id="begin_transaction_query",
transaction_options=TRANSACTION_OPTIONS,
project_id=GCP_PROJECT_ID,
)

# [START how_to_run_query]
run_query = CloudDatastoreRunQueryOperator(
task_id="run_query", body=QUERY, project_id=GCP_PROJECT_ID
)
# [END how_to_run_query]

allocate_ids >> begin_transaction_query >> run_query

begin_transaction_to_rollback = CloudDatastoreBeginTransactionOperator(
task_id="begin_transaction_to_rollback",
transaction_options=TRANSACTION_OPTIONS,
project_id=GCP_PROJECT_ID,
)

# [START how_to_rollback_transaction]
rollback_transaction = CloudDatastoreRollbackOperator(
task_id="rollback_transaction",
transaction="{{ task_instance.xcom_pull('begin_transaction_to_rollback') }}",
)
begin_transaction_to_rollback >> rollback_transaction
# [END how_to_rollback_transaction]
6 changes: 4 additions & 2 deletions airflow/providers/google/cloud/hooks/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def allocate_ids(self, partial_keys: List, project_id: str) -> List:
return resp['keys']

@GoogleBaseHook.fallback_to_default_project_id
def begin_transaction(self, project_id: str) -> str:
def begin_transaction(self, project_id: str, transaction_options: Dict[str, Any]) -> str:
"""
Begins a new transaction.
Expand All @@ -109,14 +109,16 @@ def begin_transaction(self, project_id: str) -> str:
:param project_id: Google Cloud Platform project ID against which to make the request.
:type project_id: str
:param transaction_options: Options for a new transaction.
:type transaction_options: Dict[str, Any]
:return: a transaction handle.
:rtype: str
"""
conn = self.get_conn() # type: Any

resp = (conn # pylint: disable=no-member
.projects()
.beginTransaction(projectId=project_id, body={})
.beginTransaction(projectId=project_id, body={"transactionOptions": transaction_options})
.execute(num_retries=self.num_retries))

return resp['transaction']
Expand Down
Loading

0 comments on commit 6efa1b9

Please sign in to comment.