Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 21a3606

Browse files
authoredJun 8, 2020
docs(samples): add load CSV from GCS to overwrite table sample (#428)
1 parent 6104277 commit 21a3606

File tree

2 files changed

+171
-0
lines changed

2 files changed

+171
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_load_table_gcs_csv_truncate]
20+
import com.google.cloud.bigquery.BigQuery;
21+
import com.google.cloud.bigquery.BigQueryException;
22+
import com.google.cloud.bigquery.BigQueryOptions;
23+
import com.google.cloud.bigquery.FormatOptions;
24+
import com.google.cloud.bigquery.Job;
25+
import com.google.cloud.bigquery.JobInfo;
26+
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
27+
import com.google.cloud.bigquery.LoadJobConfiguration;
28+
import com.google.cloud.bigquery.TableId;
29+
30+
// Sample to overwrite the BigQuery table data by loading a CSV file from GCS
31+
public class LoadCsvFromGcsTruncate {
32+
33+
public static void runLoadCsvFromGcsTruncate() throws Exception {
34+
// TODO(developer): Replace these variables before running the sample.
35+
String datasetName = "MY_DATASET_NAME";
36+
String tableName = "MY_TABLE_NAME";
37+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
38+
loadCsvFromGcsTruncate(datasetName, tableName, sourceUri);
39+
}
40+
41+
public static void loadCsvFromGcsTruncate(String datasetName, String tableName, String sourceUri)
42+
throws Exception {
43+
try {
44+
// Initialize client that will be used to send requests. This client only needs to be created
45+
// once, and can be reused for multiple requests.
46+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
47+
48+
TableId tableId = TableId.of(datasetName, tableName);
49+
50+
LoadJobConfiguration configuration =
51+
LoadJobConfiguration.builder(tableId, sourceUri)
52+
.setFormatOptions(FormatOptions.csv())
53+
// Set the write disposition to overwrite existing table data
54+
.setWriteDisposition(WriteDisposition.WRITE_TRUNCATE)
55+
.build();
56+
57+
// For more information on Job see:
58+
// https://googleapis.dev/java/google-cloud-clients/latest/index.html?com/google/cloud/bigquery/package-summary.html
59+
// Load the table
60+
Job loadJob = bigquery.create(JobInfo.of(configuration));
61+
62+
// Load data from a GCS parquet file into the table
63+
// Blocks until this load table job completes its execution, either failing or succeeding.
64+
Job completedJob = loadJob.waitFor();
65+
66+
// Check for errors
67+
if (completedJob == null) {
68+
throw new Exception("Job not executed since it no longer exists.");
69+
} else if (completedJob.getStatus().getError() != null) {
70+
// You can also look at queryJob.getStatus().getExecutionErrors() for all
71+
// errors, not just the latest one.
72+
throw new Exception(
73+
"BigQuery was unable to load into the table due to an error: \n"
74+
+ loadJob.getStatus().getError());
75+
}
76+
System.out.println("Table is successfully overwritten by CSV file loaded from GCS");
77+
} catch (BigQueryException | InterruptedException e) {
78+
System.out.println("Column not added during load append \n" + e.toString());
79+
}
80+
}
81+
}
82+
// [END bigquery_load_table_gcs_csv_truncate]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
21+
22+
import com.google.cloud.bigquery.Field;
23+
import com.google.cloud.bigquery.LegacySQLTypeName;
24+
import com.google.cloud.bigquery.Schema;
25+
import java.io.ByteArrayOutputStream;
26+
import java.io.PrintStream;
27+
import java.util.UUID;
28+
import org.junit.After;
29+
import org.junit.Before;
30+
import org.junit.BeforeClass;
31+
import org.junit.Test;
32+
33+
public class LoadCsvFromGcsTruncateTest {
34+
35+
private String tableName;
36+
private ByteArrayOutputStream bout;
37+
private PrintStream out;
38+
39+
private static final String BIGQUERY_DATASET_NAME = requireEnvVar("BIGQUERY_DATASET_NAME");
40+
41+
private static String requireEnvVar(String varName) {
42+
String value = System.getenv(varName);
43+
assertNotNull(
44+
"Environment variable " + varName + " is required to perform these tests.",
45+
System.getenv(varName));
46+
return value;
47+
}
48+
49+
@BeforeClass
50+
public static void checkRequirements() {
51+
requireEnvVar("BIGQUERY_DATASET_NAME");
52+
}
53+
54+
@Before
55+
public void setUp() {
56+
bout = new ByteArrayOutputStream();
57+
out = new PrintStream(bout);
58+
System.setOut(out);
59+
60+
// Create a test table
61+
tableName = "loadCsvFromGcsTruncate_TEST_" + UUID.randomUUID().toString().replace('-', '_');
62+
63+
Schema schema =
64+
Schema.of(
65+
Field.of("name", LegacySQLTypeName.STRING),
66+
Field.of("post_abbr", LegacySQLTypeName.STRING));
67+
68+
CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, schema);
69+
70+
bout = new ByteArrayOutputStream();
71+
out = new PrintStream(bout);
72+
System.setOut(out);
73+
}
74+
75+
@After
76+
public void tearDown() {
77+
// Clean up
78+
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName);
79+
System.setOut(null);
80+
}
81+
82+
@Test
83+
public void loadCsvFromGcsTruncate() throws Exception {
84+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
85+
LoadCsvFromGcsTruncate.loadCsvFromGcsTruncate(BIGQUERY_DATASET_NAME, tableName, sourceUri);
86+
assertThat(bout.toString())
87+
.contains("Table is successfully overwritten by CSV file loaded from GCS");
88+
}
89+
}

0 commit comments

Comments
 (0)
Failed to load comments.