user3435964
user3435964

Reputation: 949

Recursively upload a directory contents to GCS using GO SDK

I am trying to upload and download a directory to GCS which container large amount of data. Can someone point me how to achieve this using Golang SDK. I would like to do multipart upload as well (-m)

Sample Directory structure:

$ tree dir1/
dir1/
└── dir2
    └── dir3
        ├── 1.csv
        └── 2.csv

Equivalent GSUtil command: gustil -m cp -r dir1 gs://exmaple/

Upvotes: 0

Views: 2000

Answers (1)

Grizzle
Grizzle

Reputation: 568

By my understanding, I believe the GCP storage API is atomic and only allows for a single object upload at a time.

The gsutil command uses recursion to iterate through the provided path but ultimately does singular API calls for uploading.

The snippet below will give you some hints, please not I haven't tested the actual copying to GCP. I have only verified that the code compiles.

It's a modified version of what Google have in their docs Uploading Objects

package main

import (
    "context"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "strings"
    "time"

    "cloud.google.com/go/storage"
)

// BulkUpload uploads files in bulk
func BulkUpload(bucket, rootPath string) error {
    ctx := context.Background()
    client, err := storage.NewClient(ctx)
    if err != nil {
        return fmt.Errorf("storage.NewClient: %v", err)
    }
    defer client.Close()

    fileList, objPath, err := pathWalk(rootPath)
    if err != nil {
        return err
    }
    _ = objPath

    for i:=0; i <len(fileList);i++ {
        // Open and read local file
        f, err := os.Open(fileList[i])
        if err != nil {
            return fmt.Errorf("os.Open: %v", err)
        }
        f.Close()

        ctx, cancel := context.WithTimeout(ctx, time.Second*50)
        defer cancel()

        // Upload an object with storage.Writer.
        wc := client.Bucket(bucket).Object(objPath[i]).NewWriter(ctx)
        if _, err = io.Copy(wc, f); err != nil {
            return fmt.Errorf("io.Copy: %v", err)
        }
        if err := wc.Close(); err != nil {
            return fmt.Errorf("Writer.Close: %v", err)
        }
        fmt.Printf("Blob %v uploaded.\n", objPath[i])
    }

    return nil
}


// pathWalk create a list of files from a root path
// returns a list of all files and a list of files excluding the root path
func pathWalk(root string) ([]string, []string, error){
    var files []string
    var objectPath []string

    err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
        files = append(files, path)
        return nil
    })
    if err != nil {
        return nil, nil, fmt.Errorf("error walking path: %v", err)
    }

    for _, f := range files {
        objectPath = append(objectPath, strings.Trim(f, root))
    }

    return files, objectPath, nil
}

func main() {

    if err := BulkUpload("mybucket", "somedirectory/" ); err != nil {
        panic(err)
    }

}

Upvotes: 1

Related Questions