Skip to content

Commit

Permalink
Refactor dataproc system tests (#40720)
Browse files Browse the repository at this point in the history
  • Loading branch information
VladaZakharova authored Jul 24, 2024
1 parent 10f2503 commit 95b5a0a
Show file tree
Hide file tree
Showing 35 changed files with 264 additions and 2,431 deletions.
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/hooks/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def wait_for_operation(
self,
operation: Operation,
timeout: float | None = None,
result_retry: AsyncRetry | _MethodDefault = DEFAULT,
result_retry: AsyncRetry | _MethodDefault | Retry = DEFAULT,
) -> Any:
"""Wait for a long-lasting operation to complete."""
try:
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/operators/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2995,7 +2995,7 @@ def __init__(
metadata: Sequence[tuple[str, str]] = (),
gcp_conn_id: str = "google_cloud_default",
impersonation_chain: str | Sequence[str] | None = None,
result_retry: AsyncRetry | _MethodDefault = DEFAULT,
result_retry: AsyncRetry | _MethodDefault | Retry = DEFAULT,
asynchronous: bool = False,
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
polling_interval_seconds: int = 5,
Expand Down
10 changes: 3 additions & 7 deletions airflow/providers/google/cloud/operators/dataproc_metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
import time
from typing import TYPE_CHECKING, Sequence

from google.api_core.exceptions import AlreadyExists
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
from google.api_core.retry import Retry, exponential_sleep_generator
from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
from googleapiclient.errors import HttpError

from airflow.exceptions import AirflowException
from airflow.models import BaseOperator, BaseOperatorLink
Expand Down Expand Up @@ -242,9 +242,7 @@ def execute(self, context: Context) -> dict:
)
backup = hook.wait_for_operation(self.timeout, operation)
self.log.info("Backup %s created successfully", self.backup_id)
except HttpError as err:
if err.resp.status not in (409, "409"):
raise
except AlreadyExists:
self.log.info("Backup %s already exists", self.backup_id)
backup = hook.get_backup(
project_id=self.project_id,
Expand Down Expand Up @@ -448,9 +446,7 @@ def execute(self, context: Context) -> dict:
)
service = hook.wait_for_operation(self.timeout, operation)
self.log.info("Service %s created successfully", self.service_id)
except HttpError as err:
if err.resp.status not in (409, "409"):
raise
except AlreadyExists:
self.log.info("Instance %s already exists", self.service_id)
service = hook.get_service(
region=self.region,
Expand Down
3 changes: 2 additions & 1 deletion scripts/ci/pre_commit/check_system_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
errors: list[str] = []

WATCHER_APPEND_INSTRUCTION = "list(dag.tasks) >> watcher()"
WATCHER_APPEND_INSTRUCTION_SHORT = " >> watcher()"

PYTEST_FUNCTION = """
from tests.system.utils import get_test_run # noqa: E402
Expand All @@ -52,7 +53,7 @@
def _check_file(file: Path):
content = file.read_text()
if "from tests.system.utils.watcher import watcher" in content:
index = content.find(WATCHER_APPEND_INSTRUCTION)
index = content.find(WATCHER_APPEND_INSTRUCTION_SHORT)
if index == -1:
errors.append(
f"[red]The example {file} imports tests.system.utils.watcher "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import os
from datetime import datetime

from google.api_core.retry_async import AsyncRetry
from google.api_core.retry import Retry

from airflow.models.dag import DAG
from airflow.providers.google.cloud.operators.dataproc import (
Expand All @@ -37,10 +37,10 @@
from airflow.utils.trigger_rule import TriggerRule
from tests.system.providers.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "dataproc_batch"
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID
REGION = "europe-west1"
DAG_ID = "dataproc_batch"
REGION = "europe-west3"

BATCH_ID = f"batch-{ENV_ID}-{DAG_ID}".replace("_", "-")
BATCH_ID_2 = f"batch-{ENV_ID}-{DAG_ID}-2".replace("_", "-")
Expand Down Expand Up @@ -77,7 +77,7 @@
region=REGION,
batch=BATCH_CONFIG,
batch_id=BATCH_ID_2,
result_retry=AsyncRetry(maximum=10.0, initial=10.0, multiplier=1.0),
result_retry=Retry(maximum=100.0, initial=10.0, multiplier=1.0),
)

create_batch_3 = DataprocCreateBatchOperator(
Expand All @@ -104,10 +104,6 @@
get_batch = DataprocGetBatchOperator(
task_id="get_batch", project_id=PROJECT_ID, region=REGION, batch_id=BATCH_ID
)

get_batch_2 = DataprocGetBatchOperator(
task_id="get_batch_2", project_id=PROJECT_ID, region=REGION, batch_id=BATCH_ID_2
)
# [END how_to_cloud_dataproc_get_batch_operator]

# [START how_to_cloud_dataproc_list_batches_operator]
Expand Down Expand Up @@ -136,6 +132,14 @@
)
# [END how_to_cloud_dataproc_cancel_operation_operator]

batch_cancelled_sensor = DataprocBatchSensor(
task_id="batch_cancelled_sensor",
region=REGION,
project_id=PROJECT_ID,
batch_id=BATCH_ID_4,
poke_interval=10,
)

# [START how_to_cloud_dataproc_delete_batch_operator]
delete_batch = DataprocDeleteBatchOperator(
task_id="delete_batch", project_id=PROJECT_ID, region=REGION, batch_id=BATCH_ID
Expand All @@ -153,25 +157,30 @@
delete_batch.trigger_rule = TriggerRule.ALL_DONE
delete_batch_2.trigger_rule = TriggerRule.ALL_DONE
delete_batch_3.trigger_rule = TriggerRule.ALL_DONE
delete_batch_4.trigger_rule = TriggerRule.ALL_DONE
delete_batch_4.trigger_rule = TriggerRule.ALL_FAILED

(
# TEST SETUP
[create_batch, create_batch_2, create_batch_3]
# TEST BODY
>> batch_async_sensor
>> [get_batch, get_batch_2, list_batches]
>> get_batch
>> list_batches
>> create_batch_4
>> cancel_operation
# TEST TEARDOWN
>> [delete_batch, delete_batch_2, delete_batch_3, delete_batch_4]
>> [delete_batch, delete_batch_2, delete_batch_3]
>> batch_cancelled_sensor
>> delete_batch_4
)

from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "teardown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

# Excluding sensor because we expect it to fail due to cancelled operation
[task for task in dag.tasks if task.task_id != "batch_cancelled_sensor"] >> watcher()


from tests.system.utils import get_test_run # noqa: E402
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "dataproc_batch_deferrable"
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID
REGION = "europe-west1"
REGION = "europe-north1"
BATCH_ID = f"batch-{ENV_ID}-{DAG_ID}".replace("_", "-")
BATCH_CONFIG = {
"spark_batch": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,21 @@
ClusterGenerator,
DataprocCreateBatchOperator,
DataprocCreateClusterOperator,
DataprocDeleteBatchOperator,
DataprocDeleteClusterOperator,
)
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
from airflow.utils.trigger_rule import TriggerRule
from tests.system.providers.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
DAG_ID = "dataproc_batch_ps"
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"
REGION = "europe-west1"
CLUSTER_NAME = f"cluster-{ENV_ID}-{DAG_ID}".replace("_", "-")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("-", "_")
REGION = "europe-north1"
CLUSTER_NAME_BASE = f"cluster-{DAG_ID}".replace("_", "-")
CLUSTER_NAME_FULL = CLUSTER_NAME_BASE + f"-{ENV_ID}".replace("_", "-")
CLUSTER_NAME = CLUSTER_NAME_BASE if len(CLUSTER_NAME_FULL) >= 33 else CLUSTER_NAME_FULL
BATCH_ID = f"batch-{ENV_ID}-{DAG_ID}".replace("_", "-")

CLUSTER_GENERATOR_CONFIG_FOR_PHS = ClusterGenerator(
Expand Down Expand Up @@ -98,6 +102,14 @@
)
# [END how_to_cloud_dataproc_create_batch_operator_with_persistent_history_server]

delete_batch = DataprocDeleteBatchOperator(
task_id="delete_batch",
project_id=PROJECT_ID,
region=REGION,
batch_id=BATCH_ID,
trigger_rule=TriggerRule.ALL_DONE,
)

delete_cluster = DataprocDeleteClusterOperator(
task_id="delete_cluster",
project_id=PROJECT_ID,
Expand All @@ -117,6 +129,7 @@
# TEST BODY
>> create_batch
# TEST TEARDOWN
>> delete_batch
>> delete_cluster
>> delete_bucket
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,18 @@
DataprocStopClusterOperator,
)
from airflow.utils.trigger_rule import TriggerRule
from tests.system.providers.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

DAG_ID = "example_dataproc_cluster_create_existing_stopped_cluster"
DAG_ID = "dataproc_create_existing_stopped_cluster"

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
PROJECT_ID = os.environ.get("SYSTEMS_TESTS_GCP_PROJECTS") or ""
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

CLUSTER_NAME = f"cluster-{ENV_ID}-{DAG_ID}".replace("_", "-")
REGION = "europe-west1"

CLUSTER_NAME_BASE = f"{DAG_ID}".replace("_", "-")
CLUSTER_NAME_FULL = CLUSTER_NAME_BASE + f"-{ENV_ID}".replace("_", "-")
CLUSTER_NAME = CLUSTER_NAME_BASE if len(CLUSTER_NAME_FULL) >= 33 else CLUSTER_NAME_FULL
REGION = "europe-north1"

# Cluster definition
CLUSTER_CONFIG = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@
from airflow.utils.trigger_rule import TriggerRule
from tests.system.providers.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
DAG_ID = "dataproc_cluster_def"
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

CLUSTER_NAME = f"cluster-{ENV_ID}-{DAG_ID}".replace("_", "-")
REGION = "europe-west1"
CLUSTER_NAME_BASE = f"cluster-{DAG_ID}".replace("_", "-")
CLUSTER_NAME_FULL = CLUSTER_NAME_BASE + f"-{ENV_ID}".replace("_", "-")
CLUSTER_NAME = CLUSTER_NAME_BASE if len(CLUSTER_NAME_FULL) >= 33 else CLUSTER_NAME_FULL
REGION = "europe-north1"


# Cluster definition
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@
from airflow.utils.trigger_rule import TriggerRule
from tests.system.providers.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
DAG_ID = "dataproc_diagnose_cluster"
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

CLUSTER_NAME = f"cluster-{ENV_ID}-{DAG_ID}".replace("_", "-")
REGION = "europe-west1"
CLUSTER_NAME_BASE = f"cluster-{DAG_ID}".replace("_", "-")
CLUSTER_NAME_FULL = CLUSTER_NAME_BASE + f"-{ENV_ID}".replace("_", "-")
CLUSTER_NAME = CLUSTER_NAME_BASE if len(CLUSTER_NAME_FULL) >= 33 else CLUSTER_NAME_FULL

REGION = "europe-north1"


# Cluster definition
Expand Down Expand Up @@ -102,7 +105,7 @@
# TEST SETUP
create_cluster
# TEST BODY
>> diagnose_cluster
>> [diagnose_cluster, diagnose_cluster_deferrable]
# TEST TEARDOWN
>> delete_cluster
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,39 @@

import os
from datetime import datetime
from pathlib import Path

from airflow.models.dag import DAG
from airflow.providers.google.cloud.operators.dataproc import (
ClusterGenerator,
DataprocCreateClusterOperator,
DataprocDeleteClusterOperator,
)
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
from airflow.providers.google.cloud.operators.gcs import (
GCSCreateBucketOperator,
GCSDeleteBucketOperator,
GCSSynchronizeBucketsOperator,
)
from airflow.utils.trigger_rule import TriggerRule
from tests.system.providers.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
DAG_ID = "dataproc_cluster_generation"
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID

BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"
CLUSTER_NAME = f"cluster-{ENV_ID}-{DAG_ID}".replace("_", "-")
REGION = "europe-west1"
ZONE = "europe-west1-b"
INIT_FILE_SRC = str(Path(__file__).parent / "resources" / "pip-install.sh")
RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
INIT_FILE = "pip-install.sh"
GCS_INIT_FILE = f"gs://{RESOURCE_DATA_BUCKET}/dataproc/{INIT_FILE}"

# Cluster definition: Generating Cluster Config for DataprocCreateClusterOperator
# [START how_to_cloud_dataproc_create_cluster_generate_cluster_config]
CLUSTER_NAME_BASE = f"cluster-{DAG_ID}".replace("_", "-")
CLUSTER_NAME_FULL = CLUSTER_NAME_BASE + f"-{ENV_ID}".replace("_", "-")
CLUSTER_NAME = CLUSTER_NAME_BASE if len(CLUSTER_NAME_FULL) >= 33 else CLUSTER_NAME_FULL

INIT_FILE = "pip-install.sh"
REGION = "us-east4"
ZONE = "us-east4-a"

# Cluster definition: Generating Cluster Config for DataprocCreateClusterOperator
# [START how_to_cloud_dataproc_create_cluster_generate_cluster_config]
CLUSTER_GENERATOR_CONFIG = ClusterGenerator(
project_id=PROJECT_ID,
zone=ZONE,
Expand All @@ -60,7 +66,7 @@
worker_disk_size=32,
num_workers=2,
storage_bucket=BUCKET_NAME,
init_actions_uris=[f"gs://{BUCKET_NAME}/{INIT_FILE}"],
init_actions_uris=[GCS_INIT_FILE],
metadata={"PIP_PACKAGES": "pyyaml requests pandas openpyxl"},
num_preemptible_workers=1,
preemptibility="PREEMPTIBLE",
Expand All @@ -80,11 +86,13 @@
task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID
)

upload_file = LocalFilesystemToGCSOperator(
task_id="upload_file",
src=INIT_FILE_SRC,
dst=INIT_FILE,
bucket=BUCKET_NAME,
move_init_file = GCSSynchronizeBucketsOperator(
task_id="move_init_file",
source_bucket=RESOURCE_DATA_BUCKET,
source_object="dataproc",
destination_bucket=BUCKET_NAME,
destination_object="dataproc",
recursive=True,
)

# [START how_to_cloud_dataproc_create_cluster_generate_cluster_config_operator]
Expand Down Expand Up @@ -114,7 +122,7 @@
(
# TEST SETUP
create_bucket
>> upload_file
>> move_init_file
# TEST BODY
>> create_dataproc_cluster
# TEST TEARDOWN
Expand Down
Loading

0 comments on commit 95b5a0a

Please sign in to comment.