| // Copyright 2016 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package bigquery |
| |
| import ( |
| "context" |
| "time" |
| |
| "cloud.google.com/go/internal/trace" |
| bq "google.golang.org/api/bigquery/v2" |
| ) |
| |
| // ExtractConfig holds the configuration for an extract job. |
| type ExtractConfig struct { |
| // Src is the table from which data will be extracted. |
| // Only one of Src or SrcModel should be specified. |
| Src *Table |
| |
| // SrcModel is the ML model from which the data will be extracted. |
| // Only one of Src or SrcModel should be specified. |
| SrcModel *Model |
| |
| // Dst is the destination into which the data will be extracted. |
| Dst *GCSReference |
| |
| // DisableHeader disables the printing of a header row in exported data. |
| DisableHeader bool |
| |
| // The labels associated with this job. |
| Labels map[string]string |
| |
| // For Avro-based extracts, controls whether logical type annotations are generated. |
| // |
| // Example: With this enabled, writing a BigQuery TIMESTAMP column will result in |
| // an integer column annotated with the appropriate timestamp-micros/millis annotation |
| // in the resulting Avro files. |
| UseAvroLogicalTypes bool |
| |
| // Sets a best-effort deadline on a specific job. If job execution exceeds this |
| // timeout, BigQuery may attempt to cancel this work automatically. |
| // |
| // This deadline cannot be adjusted or removed once the job is created. Consider |
| // using Job.Cancel in situations where you need more dynamic behavior. |
| // |
| // Experimental: this option is experimental and may be modified or removed in future versions, |
| // regardless of any other documented package stability guarantees. |
| JobTimeout time.Duration |
| } |
| |
| func (e *ExtractConfig) toBQ() *bq.JobConfiguration { |
| var printHeader *bool |
| if e.DisableHeader { |
| f := false |
| printHeader = &f |
| } |
| cfg := &bq.JobConfiguration{ |
| Labels: e.Labels, |
| Extract: &bq.JobConfigurationExtract{ |
| DestinationUris: append([]string{}, e.Dst.URIs...), |
| Compression: string(e.Dst.Compression), |
| DestinationFormat: string(e.Dst.DestinationFormat), |
| FieldDelimiter: e.Dst.FieldDelimiter, |
| |
| PrintHeader: printHeader, |
| UseAvroLogicalTypes: e.UseAvroLogicalTypes, |
| }, |
| JobTimeoutMs: e.JobTimeout.Milliseconds(), |
| } |
| if e.Src != nil { |
| cfg.Extract.SourceTable = e.Src.toBQ() |
| } |
| if e.SrcModel != nil { |
| cfg.Extract.SourceModel = e.SrcModel.toBQ() |
| } |
| return cfg |
| } |
| |
| func bqToExtractConfig(q *bq.JobConfiguration, c *Client) *ExtractConfig { |
| qe := q.Extract |
| return &ExtractConfig{ |
| Labels: q.Labels, |
| Dst: &GCSReference{ |
| URIs: qe.DestinationUris, |
| Compression: Compression(qe.Compression), |
| DestinationFormat: DataFormat(qe.DestinationFormat), |
| FileConfig: FileConfig{ |
| CSVOptions: CSVOptions{ |
| FieldDelimiter: qe.FieldDelimiter, |
| }, |
| }, |
| }, |
| DisableHeader: qe.PrintHeader != nil && !*qe.PrintHeader, |
| Src: bqToTable(qe.SourceTable, c), |
| SrcModel: bqToModel(qe.SourceModel, c), |
| UseAvroLogicalTypes: qe.UseAvroLogicalTypes, |
| JobTimeout: time.Duration(q.JobTimeoutMs) * time.Millisecond, |
| } |
| } |
| |
| // An Extractor extracts data from a BigQuery table into Google Cloud Storage. |
| type Extractor struct { |
| JobIDConfig |
| ExtractConfig |
| c *Client |
| } |
| |
| // ExtractorTo returns an Extractor which can be used to extract data from a |
| // BigQuery table into Google Cloud Storage. |
| // The returned Extractor may optionally be further configured before its Run method is called. |
| func (t *Table) ExtractorTo(dst *GCSReference) *Extractor { |
| return &Extractor{ |
| c: t.c, |
| ExtractConfig: ExtractConfig{ |
| Src: t, |
| Dst: dst, |
| }, |
| } |
| } |
| |
| // ExtractorTo returns an Extractor which can be persist a BigQuery Model into |
| // Google Cloud Storage. |
| // The returned Extractor may be further configured before its Run method is called. |
| func (m *Model) ExtractorTo(dst *GCSReference) *Extractor { |
| return &Extractor{ |
| c: m.c, |
| ExtractConfig: ExtractConfig{ |
| SrcModel: m, |
| Dst: dst, |
| }, |
| } |
| } |
| |
| // Run initiates an extract job. |
| func (e *Extractor) Run(ctx context.Context) (j *Job, err error) { |
| ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Extractor.Run") |
| defer func() { trace.EndSpan(ctx, err) }() |
| |
| return e.c.insertJob(ctx, e.newJob(), nil) |
| } |
| |
| func (e *Extractor) newJob() *bq.Job { |
| return &bq.Job{ |
| JobReference: e.JobIDConfig.createJobRef(e.c), |
| Configuration: e.ExtractConfig.toBQ(), |
| } |
| } |