finish tests, some cleanup

googleapis · BrennaEpp · Jun 5, 2024 · Jan 31, 2024 · Apr 23, 2024 · Apr 25, 2024
commit 9fcaca4e567d053fa677a4b36af580f47dd87b5f
diff --git a/go.work.sum b/go.work.sum
@@ -1,3 +1,4 @@
+cloud.google.com/go/dataproc v1.12.0 h1:W47qHL3W4BPkAIbk4SWmIERwsWBaNnWm0P2sdx3YgGU=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.46.0/go.mod h1:V28hx+cUCZC9e3qcqszMb+Sbt8cQZtHTiXOmyDzoDOg=
 github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
 github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=

@@ -0,0 +1,83 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+Package transfermanager provides an easy way to parallelize downloads in Google
+Cloud Storage.
+
+More information about Google Cloud Storage is available at
+https://cloud.google.com/storage/docs.
+
+See https://pkg.go.dev/cloud.google.com/go for authentication, timeouts,
+connection pooling and similar aspects of this package.
+
+NOTE: This package is in alpha. It is not stable, and is likely to change.
+
+# Example usage
+
+	// Pass in any client opts or set retry policy here
+	client, err := storage.NewClient(ctx) // can also use NewGRPCClient
+	if err != nil {
+		// handle error
+	}
+
+	// Create Downloader with desired options, including number of workers,
+	// part size, per operation timeout, etc.
+	d, err := transfermanager.NewDownloader(client, transfermanager.WithWorkers(16))
+	if err != nil {
+		// handle error
+	}
+
+	// Create local file writer for output
+	f, err := os.Create("/path/to/localfile")
+	if err != nil {
+		// handle error
+	}
+
+	// Create download input
+	in := &transfermanager.DownloadObjectInput{
+		Bucket:      "mybucket",
+		Object:      "myblob",
+		Destination: f,
+		// Optionally specify params to apply to download.
+		EncryptionKey: []byte("mykey"),
+	}
+
+	// Can set timeout on this download using context. Note that this download
+	// may not start immediately if all workers are busy, so this may time out
+	// before the download even starts. To set a timeout that starts with the
+	// download, use transfermanager.WithPerOpTimeout(time.Duration).
+	ctx, cancel = context.WithTimeout(ctx, 1*time.Minute)
+	defer cancel()
+
+	// Add to Downloader
+	d.DownloadObject(ctx, in)
+
+	// Repeat if desired
+
+	// Wait for all downloads to complete.
+	d.WaitAndClose()
+
+	// Iterate through completed downloads and process results. This can
+	// also happen async in a go routine as the downloads run.
+	results := d.Results()
+	for _, out := range results {
+		if out.Err != nil {
+			log.Printf("download of %v failed with error %v", out.Name, out.Err)
+		} else {
+			log.Printf("download of %v succeeded", out.Object)
+		}
+	}
+*/
+package transfermanager // import "cloud.google.com/go/storage/transfermanager"
@@ -43,9 +43,7 @@ type Downloader struct {
 // download but is non-blocking; call Downloader.Results to process the result.
 func (d *Downloader) DownloadObject(ctx context.Context, input *DownloadObjectInput) {
 	input.ctx = ctx
-	d.inputsMu.Lock()
-	d.inputs = append(d.inputs, *input)
-	d.inputsMu.Unlock()
+	d.addInput(input)
 }
 
 // DownloadObject queues the download of a single object. This will initiate the
@@ -54,9 +52,7 @@ func (d *Downloader) DownloadObject(ctx context.Context, input *DownloadObjectIn
 func (d *Downloader) DownloadObjectWithCallback(ctx context.Context, input *DownloadObjectInput, callback func(*DownloadOutput)) {
 	input.ctx = ctx
 	input.callback = &callback
-	d.inputsMu.Lock()
-	d.inputs = append(d.inputs, *input)
-	d.inputsMu.Unlock()
+	d.addInput(input)
 }
 
 // WaitAndClose waits for all outstanding downloads to complete. The Downloader
@@ -73,9 +69,18 @@ func (d *Downloader) WaitAndClose() error {
 	return nil
 }
 
-// Results returns the iterator for download outputs.
+// Results returns all the results of the downloads completed since the last
+// time it was called. Call WaitAndClose before calling Results to wait for all
+// downloads to complete.
+// Results will not return results for downloads initiated with a callback.
 func (d *Downloader) Results() []DownloadOutput {
-	return d.results
+	d.resultsMu.Lock()
+	r := make([]DownloadOutput, len(d.results))
+	copy(r, d.results)
+	d.results = []DownloadOutput{}
+	d.resultsMu.Unlock()
+
+	return r
 }
 
 // sendInputsToWorkChan listens continuously to the inputs slice until d.done.
@@ -112,6 +117,24 @@ func (d *Downloader) drainInput() {
 	}
 }
 
+func (d *Downloader) addInput(input *DownloadObjectInput) {
+	d.inputsMu.Lock()
+	d.inputs = append(d.inputs, *input)
+	d.inputsMu.Unlock()
+}
+
+func (d *Downloader) addResult(result *DownloadOutput) {
+	d.resultsMu.Lock()
+	d.results = append(d.results, *result)
+	d.resultsMu.Unlock()
+}
+
+func (d *Downloader) error(err error) {
+	d.errorsMu.Lock()
+	d.errors = append(d.errors, err)
+	d.errorsMu.Unlock()
+}
+
 // downloadWorker continuously processes downloads until the work channel is closed.
 func (d *Downloader) downloadWorker() {
 	for {
@@ -128,18 +151,14 @@ func (d *Downloader) downloadWorker() {
 
 		// Keep track of any error that occurred.
 		if out.Err != nil {
-			d.errorsMu.Lock()
-			d.errors = append(d.errors, out.Err)
-			d.errorsMu.Unlock()
+			d.error(out.Err)
 		}
 
 		// Either execute the callback, or append to results.
 		if input.callback != nil {
 			(*input.callback)(out)
 		} else {
-			d.resultsMu.Lock()
-			d.results = append(d.results, *out)
-			d.resultsMu.Unlock()
+			d.addResult(out)
 		}
 	}
 	d.workers.Done()
@@ -204,6 +223,7 @@ type DownloadObjectInput struct {
 }
 
 // downloadShard will read a specific object into in.Destination.
+// If timeout is less than 0, no timeout is set.
 // TODO: download a single shard instead of the entire object.
 func (in *DownloadObjectInput) downloadShard(client *storage.Client, timeout time.Duration) (out *DownloadOutput) {
 	out = &DownloadOutput{Bucket: in.Bucket, Object: in.Object}
@@ -220,7 +240,7 @@ func (in *DownloadObjectInput) downloadShard(client *storage.Client, timeout tim
 	o := client.Bucket(in.Bucket).Object(in.Object)
 
 	if in.Conditions != nil {
-		o.If(*in.Conditions)
+		o = o.If(*in.Conditions)
 	}
 	if in.Generation != nil {
 		o = o.Generation(*in.Generation)
@@ -269,118 +289,3 @@ type DownloadOutput struct {
 	Err    error                      // error occurring during download
 	Attrs  *storage.ReaderObjectAttrs // attributes of downloaded object, if successful
 }
-
-// // DownloadOutputIterator allows the end user to iterate through completed
-// // object downloads.
-// type DownloadOutputIterator struct {
-// 	output <-chan *DownloadOutput
-// }
-
-// // Next iterates through results. When complete, will return the iterator.Done
-// // error. It is considered complete once WaitAndClose() has been called on the
-// // Downloader.
-// // Note that if there was an error reading an object, it will not be returned
-// // by Next - check DownloadOutput.Err instead.
-// // DownloadOutputs will be available as the downloads complete; they can
-// // be iterated through asynchronously or at the end of the job.
-// // Next will block if there are no more completed downloads (and the Downloader
-// // is not closed).
-// func (it *DownloadOutputIterator) Next() (*DownloadOutput, error) {
-// 	out, ok := <-it.output
-// 	if !ok {
-// 		return nil, iterator.Done
-// 	}
-// 	fmt.Println(out)
-// 	return out, nil
-// }
-
-// // listenForAndDelegateWork will receive from the work chan, and start goroutines
-// // to execute that work without blocking.
-// // It should be called only once.
-// func (d *Downloader) listenForAndDelegateWork1() {
-// 	for {
-// 		// Dequeue the work. Can block.
-// 		input, ok := <-d.work
-// 		if !ok {
-// 			break // no more work; exit
-// 		}
-
-// 		// Start a worker. This may block.
-// 		d.workerGroup.Go(func() error {
-// 			// Do the download.
-// 			// TODO: break down the input into smaller pieces if necessary; maybe as follows:
-// 			// Only request partSize data to begin with. If no error and we haven't finished
-// 			// reading the object, enqueue the remaining pieces of work (by sending them to d.work)
-// 			// and mark in the out var the amount of shards to wait for.
-// 			out := input.downloadShard(d.client, d.config.perOperationTimeout)
-
-// 			// Send the output to be received by Next. This could block until received
-// 			// (ie. the user calls Next), but it's okay; our worker has already returned.
-// 			// Alternatively, we could feed these to a slice that we grab from in Next.
-// 			// This would not block then, but would require synchronization of the slice.
-// 			go func() {
-// 				out := out
-// 				d.output <- out
-// 			}()
-
-// 			// We return the error here, to communicate to d.workergroup.Wait
-// 			// that there has been an error.
-// 			// Since the group does not use a shared context, this should not
-// 			// affect any of the other operations using the group.
-// 			// TO-DO: in addition to this, if we want WaitAndClose to return a
-// 			// multi err, we will need to record these errors somewhere.
-// 			// Where-ever we record those, it will need synchronization since
-// 			// this is concurrent.
-// 			return out.Err
-// 		})
-
-// 	}
-// }
-
-// // drainInput consumes everything in the inputs slice and dispatches workers.
-// // It will block if there are not enough workers to consume every input, until
-// // all inputs are dispatched to an available worker.
-// func (d *Downloader) drainInpu1t() {
-// 	fmt.Println(len(d.inputs))
-
-// 	for len(d.inputs) > 0 {
-// 		d.inputsMu.Lock()
-// 		if len(d.inputs) < 1 {
-// 			return
-// 		}
-// 		input := d.inputs[0]
-// 		d.inputs = d.inputs[1:]
-// 		d.inputsMu.Unlock()
-
-// 		// Start a worker. This may block, but only if there aren't enough workers.
-// 		d.workerGroup.Go(func() error {
-// 			// Do the download.
-// 			// TODO: break down the input into smaller pieces if necessary; maybe as follows:
-// 			// Only request partSize data to begin with. If no error and we haven't finished
-// 			// reading the object, enqueue the remaining pieces of work
-// 			// and mark in the out var the amount of shards to wait for.
-// 			out := input.downloadShard(d.client, d.config.perOperationTimeout)
-
-// 			// Either return the callback, or append to results.
-// 			if input.callback != nil {
-// 				(*input.callback)(out)
-// 			} else {
-// 				d.resultsMu.Lock()
-// 				d.results = append(d.results, *out)
-// 				fmt.Println(len(d.results))
-
-// 				d.resultsMu.Unlock()
-// 			}
-
-// 			// We return the error here, to communicate to d.workergroup.Wait
-// 			// that there has been an error.
-// 			// Since the group does not use a shared context, this should not
-// 			// affect any of the other operations using the group.
-// 			// TO-DO: in addition to this, if we want WaitAndClose to return a
-// 			// multi err, we will need to record these errors somewhere.
-// 			// Where-ever we record those, it will need synchronization since
-// 			// this is concurrent.
-// 			return out.Err
-// 		})
-// 	}
-// }