Skip to content

Commit

Permalink
docs(samples): add export query results to Amazon S3 sample and IT (#853
Browse files Browse the repository at this point in the history
)

* docs(samples): add export query results to Amazon S3 sample and IT

* update based on comments
  • Loading branch information
stephaniewang526 authored Oct 23, 2020
1 parent 725c93f commit 5358620
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

// [START bigquery_omni_export_query_result_to_s3]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;

// Sample to export query results to Amazon S3 bucket
public class ExportQueryResultsToS3 {

public static void main(String[] args) throws InterruptedException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String externalTableName = "MY_EXTERNAL_TABLE_NAME";
// connectionName should be in the format of connection_region.connection_name. e.g.
// aws-us-east-1.s3-write-conn
String connectionName = "MY_CONNECTION_REGION.MY_CONNECTION_NAME";
// destinationUri must contain exactly one * anywhere in the leaf directory of the path string
// e.g. ../aa/*, ../aa/b*c, ../aa/*bc, and ../aa/bc*
// BigQuery replaces * with 0000..N depending on the number of files exported.
// BigQuery determines the file count and sizes.
String destinationUri = "s3://your-bucket-name/*";
String format = "EXPORT_FORMAT";
// Export result of query to find states starting with 'W'
String query =
String.format(
"EXPORT DATA WITH CONNECTION %s AS SELECT * FROM %s.%s.%s WHERE name LIKE 'W%%'",
connectionName, destinationUri, format, projectId, datasetName, externalTableName);
exportQueryResultsToS3(query);
}

public static void exportQueryResultsToS3(String query) throws InterruptedException {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

TableResult results = bigquery.query(QueryJobConfiguration.of(query));

results
.iterateAll()
.forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));

System.out.println("Query results exported to Amazon S3 successfully.");
} catch (BigQueryException e) {
System.out.println("Query not performed \n" + e.toString());
}
}
}
// [END bigquery_omni_export_query_result_to_s3]
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ public static void main(String[] args) {
for (Dataset dataset : datasets.getValues()) {
String datasetName = dataset.getDatasetId().getDataset();
if ((datasetName.contains("CREATE_DATASET_AWS_TEST_")
|| datasetName.contains("MY_DATASET_")
|| datasetName.contains("gcloud_test_")
|| datasetName.contains("SHARED_DATASET_TEST_"))
|| datasetName.contains("MY_DATASET_")
|| datasetName.contains("gcloud_test_")
|| datasetName.contains("SHARED_DATASET_TEST_"))
// && dataset.getCreationTime() > sixHourAgo
) {
System.out.format("\tDeleting Dataset: %s\n", datasetName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
public class CreateExternalTableAwsIT {

private static final String ID = UUID.randomUUID().toString().substring(0, 8);
private static final String LOCATION = "aws-us-east-1";
private final Logger log = Logger.getLogger(this.getClass().getName());
private String tableName;
private ByteArrayOutputStream bout;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class ExportQueryResultsToS3IT {
private final Logger log = Logger.getLogger(this.getClass().getName());
private ByteArrayOutputStream bout;
private PrintStream out;
private PrintStream originalPrintStream;

private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
private static final String OMNI_EXTERNAL_TABLE_NAME = requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");
private static final String AWS_WRITE_CONNECTION_ID = requireEnvVar("AWS_WRITE_CONNECTION_ID");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
return value;
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("OMNI_PROJECT_ID");
requireEnvVar("OMNI_DATASET_NAME");
requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");
requireEnvVar("AWS_WRITE_CONNECTION_ID");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
System.setOut(out);
}

@After
public void tearDown() {
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
log.log(Level.INFO, bout.toString());
}

@Test
public void testQueryExternalTableAws() throws InterruptedException {
String destinationUri = "s3://omni-samples-test-bucket/client-lib-test*";
String format = "CSV";
String query =
String.format(
"EXPORT DATA WITH CONNECTION `%s` OPTIONS(uri='%s', format='%s') "
+ "AS SELECT * FROM %s.%s.%s WHERE name LIKE 'W%%'",
AWS_WRITE_CONNECTION_ID,
destinationUri,
format,
OMNI_PROJECT_ID,
OMNI_DATASET_NAME,
OMNI_EXTERNAL_TABLE_NAME);
ExportQueryResultsToS3.exportQueryResultsToS3(query);
assertThat(bout.toString()).contains("Query results exported to Amazon S3 successfully.");
}
}

0 comments on commit 5358620

Please sign in to comment.