Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 497ddf4

Browse files
author
Praful Makani
authoredAug 7, 2020
docs(samples): add load csv autodetect data from gcs (#655)
1 parent 319b982 commit 497ddf4

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_load_table_gcs_csv_autodetect]
20+
import com.google.cloud.bigquery.BigQuery;
21+
import com.google.cloud.bigquery.BigQueryException;
22+
import com.google.cloud.bigquery.BigQueryOptions;
23+
import com.google.cloud.bigquery.CsvOptions;
24+
import com.google.cloud.bigquery.Job;
25+
import com.google.cloud.bigquery.JobInfo;
26+
import com.google.cloud.bigquery.LoadJobConfiguration;
27+
import com.google.cloud.bigquery.TableId;
28+
29+
// Sample to load CSV data with autodetect schema from Cloud Storage into a new BigQuery table
30+
public class LoadCsvFromGCSAutodetect {
31+
32+
public static void runLoadCsvFromGCSAutodetect() {
33+
// TODO(developer): Replace these variables before running the sample.
34+
String datasetName = "MY_DATASET_NAME";
35+
String tableName = "MY_TABLE_NAME";
36+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
37+
loadCsvFromGCSAutodetect(datasetName, tableName, sourceUri);
38+
}
39+
40+
public static void loadCsvFromGCSAutodetect(
41+
String datasetName, String tableName, String sourceUri) {
42+
try {
43+
// Initialize client that will be used to send requests. This client only needs to be created
44+
// once, and can be reused for multiple requests.
45+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
46+
47+
TableId tableId = TableId.of(datasetName, tableName);
48+
49+
// Skip header row in the file.
50+
CsvOptions csvOptions = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
51+
52+
LoadJobConfiguration loadConfig =
53+
LoadJobConfiguration.newBuilder(tableId, sourceUri)
54+
.setFormatOptions(csvOptions)
55+
.setAutodetect(true)
56+
.build();
57+
58+
// Load data from a GCS CSV file into the table
59+
Job job = bigquery.create(JobInfo.of(loadConfig));
60+
// Blocks until this load table job completes its execution, either failing or succeeding.
61+
job = job.waitFor();
62+
if (job.isDone() && job.getStatus().getError() == null) {
63+
System.out.println("CSV Autodetect from GCS successfully loaded in a table");
64+
} else {
65+
System.out.println(
66+
"BigQuery was unable to load into the table due to an error:"
67+
+ job.getStatus().getError());
68+
}
69+
} catch (BigQueryException | InterruptedException e) {
70+
System.out.println("Column not added during load append \n" + e.toString());
71+
}
72+
}
73+
}
74+
// [END bigquery_load_table_gcs_csv_autodetect]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
21+
22+
import com.google.cloud.bigquery.Schema;
23+
import java.io.ByteArrayOutputStream;
24+
import java.io.PrintStream;
25+
import java.util.UUID;
26+
import org.junit.After;
27+
import org.junit.Before;
28+
import org.junit.BeforeClass;
29+
import org.junit.Test;
30+
31+
public class LoadCsvFromGCSAutodetectIT {
32+
33+
private String tableName;
34+
private ByteArrayOutputStream bout;
35+
private PrintStream out;
36+
37+
private static final String BIGQUERY_DATASET_NAME = requireEnvVar("BIGQUERY_DATASET_NAME");
38+
39+
private static String requireEnvVar(String varName) {
40+
String value = System.getenv(varName);
41+
assertNotNull(
42+
"Environment variable " + varName + " is required to perform these tests.",
43+
System.getenv(varName));
44+
return value;
45+
}
46+
47+
@BeforeClass
48+
public static void checkRequirements() {
49+
requireEnvVar("BIGQUERY_DATASET_NAME");
50+
}
51+
52+
@Before
53+
public void setUp() {
54+
bout = new ByteArrayOutputStream();
55+
out = new PrintStream(bout);
56+
System.setOut(out);
57+
58+
// Create a test table
59+
tableName =
60+
"LOAD_CSV_TABLE_AUTODETECT_FROM_GCS_TEST_" + UUID.randomUUID().toString().substring(0, 8);
61+
CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, Schema.of());
62+
63+
bout = new ByteArrayOutputStream();
64+
out = new PrintStream(bout);
65+
System.setOut(out);
66+
}
67+
68+
@After
69+
public void tearDown() {
70+
// Clean up
71+
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName);
72+
System.setOut(null);
73+
}
74+
75+
@Test
76+
public void testLoadCsvFromGCSAutodetect() {
77+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
78+
LoadCsvFromGCSAutodetect.loadCsvFromGCSAutodetect(BIGQUERY_DATASET_NAME, tableName, sourceUri);
79+
assertThat(bout.toString()).contains("CSV Autodetect from GCS successfully loaded in a table");
80+
}
81+
}

0 commit comments

Comments
 (0)
Failed to load comments.