Vehicle-Anti-Theft-Face-Rec.../venv/Lib/site-packages/gcloud/bigquery/job.py

1028 lines
35 KiB
Python

# Copyright 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Define API Jobs."""
import six
from gcloud.exceptions import NotFound
from gcloud._helpers import _datetime_from_microseconds
from gcloud.bigquery.dataset import Dataset
from gcloud.bigquery.table import SchemaField
from gcloud.bigquery.table import Table
from gcloud.bigquery.table import _build_schema_resource
from gcloud.bigquery.table import _parse_schema_resource
from gcloud.bigquery._helpers import _EnumProperty
from gcloud.bigquery._helpers import _TypedProperty
class Compression(_EnumProperty):
"""Pseudo-enum for ``compression`` properties."""
GZIP = 'GZIP'
NONE = 'NONE'
ALLOWED = (GZIP, NONE)
class CreateDisposition(_EnumProperty):
"""Pseudo-enum for ``create_disposition`` properties."""
CREATE_IF_NEEDED = 'CREATE_IF_NEEDED'
CREATE_NEVER = 'CREATE_NEVER'
ALLOWED = (CREATE_IF_NEEDED, CREATE_NEVER)
class DestinationFormat(_EnumProperty):
"""Pseudo-enum for ``destination_format`` properties."""
CSV = 'CSV'
NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON'
AVRO = 'AVRO'
ALLOWED = (CSV, NEWLINE_DELIMITED_JSON, AVRO)
class Encoding(_EnumProperty):
"""Pseudo-enum for ``encoding`` properties."""
UTF_8 = 'UTF-8'
ISO_8559_1 = 'ISO-8559-1'
ALLOWED = (UTF_8, ISO_8559_1)
class QueryPriority(_EnumProperty):
"""Pseudo-enum for ``QueryJob.priority`` property."""
INTERACTIVE = 'INTERACTIVE'
BATCH = 'BATCH'
ALLOWED = (INTERACTIVE, BATCH)
class SourceFormat(_EnumProperty):
"""Pseudo-enum for ``source_format`` properties."""
CSV = 'CSV'
DATASTORE_BACKUP = 'DATASTORE_BACKUP'
NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON'
ALLOWED = (CSV, DATASTORE_BACKUP, NEWLINE_DELIMITED_JSON)
class WriteDisposition(_EnumProperty):
"""Pseudo-enum for ``write_disposition`` properties."""
WRITE_APPEND = 'WRITE_APPEND'
WRITE_TRUNCATE = 'WRITE_TRUNCATE'
WRITE_EMPTY = 'WRITE_EMPTY'
ALLOWED = (WRITE_APPEND, WRITE_TRUNCATE, WRITE_EMPTY)
class _BaseJob(object):
"""Base class for jobs.
:type client: :class:`gcloud.bigquery.client.Client`
:param client: A client which holds credentials and project configuration
for the dataset (which requires a project).
"""
def __init__(self, client):
self._client = client
self._properties = {}
@property
def project(self):
"""Project bound to the job.
:rtype: string
:returns: the project (derived from the client).
"""
return self._client.project
def _require_client(self, client):
"""Check client or verify over-ride.
:type client: :class:`gcloud.bigquery.client.Client` or ``NoneType``
:param client: the client to use. If not passed, falls back to the
``client`` stored on the current dataset.
:rtype: :class:`gcloud.bigquery.client.Client`
:returns: The client passed in or the currently bound client.
"""
if client is None:
client = self._client
return client
class _AsyncJob(_BaseJob):
"""Base class for asynchronous jobs.
:type name: string
:param name: the name of the job
:type client: :class:`gcloud.bigquery.client.Client`
:param client: A client which holds credentials and project configuration
for the dataset (which requires a project).
"""
def __init__(self, name, client):
super(_AsyncJob, self).__init__(client)
self.name = name
@property
def job_type(self):
"""Type of job
:rtype: string
:returns: one of 'load', 'copy', 'extract', 'query'
"""
return self._JOB_TYPE
@property
def path(self):
"""URL path for the job's APIs.
:rtype: string
:returns: the path based on project and job name.
"""
return '/projects/%s/jobs/%s' % (self.project, self.name)
@property
def etag(self):
"""ETag for the job resource.
:rtype: string, or ``NoneType``
:returns: the ETag (None until set from the server).
"""
return self._properties.get('etag')
@property
def self_link(self):
"""URL for the job resource.
:rtype: string, or ``NoneType``
:returns: the URL (None until set from the server).
"""
return self._properties.get('selfLink')
@property
def user_email(self):
"""E-mail address of user who submitted the job.
:rtype: string, or ``NoneType``
:returns: the URL (None until set from the server).
"""
return self._properties.get('user_email')
@property
def created(self):
"""Datetime at which the job was created.
:rtype: ``datetime.datetime``, or ``NoneType``
:returns: the creation time (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
millis = statistics.get('creationTime')
if millis is not None:
return _datetime_from_microseconds(millis * 1000.0)
@property
def started(self):
"""Datetime at which the job was started.
:rtype: ``datetime.datetime``, or ``NoneType``
:returns: the start time (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
millis = statistics.get('startTime')
if millis is not None:
return _datetime_from_microseconds(millis * 1000.0)
@property
def ended(self):
"""Datetime at which the job finished.
:rtype: ``datetime.datetime``, or ``NoneType``
:returns: the end time (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
millis = statistics.get('endTime')
if millis is not None:
return _datetime_from_microseconds(millis * 1000.0)
@property
def error_result(self):
"""Error information about the job as a whole.
:rtype: mapping, or ``NoneType``
:returns: the error information (None until set from the server).
"""
status = self._properties.get('status')
if status is not None:
return status.get('errorResult')
@property
def errors(self):
"""Information about individual errors generated by the job.
:rtype: list of mappings, or ``NoneType``
:returns: the error information (None until set from the server).
"""
status = self._properties.get('status')
if status is not None:
return status.get('errors')
@property
def state(self):
"""Status of the job.
:rtype: string, or ``NoneType``
:returns: the state (None until set from the server).
"""
status = self._properties.get('status')
if status is not None:
return status.get('state')
def _scrub_local_properties(self, cleaned):
"""Helper: handle subclass properties in cleaned."""
pass
def _set_properties(self, api_response):
"""Update properties from resource in body of ``api_response``
:type api_response: httplib2.Response
:param api_response: response returned from an API call
"""
cleaned = api_response.copy()
self._scrub_local_properties(cleaned)
statistics = cleaned.get('statistics', {})
if 'creationTime' in statistics:
statistics['creationTime'] = float(statistics['creationTime'])
if 'startTime' in statistics:
statistics['startTime'] = float(statistics['startTime'])
if 'endTime' in statistics:
statistics['endTime'] = float(statistics['endTime'])
self._properties.clear()
self._properties.update(cleaned)
@classmethod
def _get_resource_config(cls, resource):
"""Helper for :meth:`from_api_repr`
:type resource: dict
:param resource: resource for the job
:rtype: dict
:returns: tuple (string, dict), where the first element is the
job name and the second contains job-specific configuration.
:raises: :class:`KeyError` if the resource has no identifier, or
is missing the appropriate configuration.
"""
if ('jobReference' not in resource or
'jobId' not in resource['jobReference']):
raise KeyError('Resource lacks required identity information: '
'["jobReference"]["jobId"]')
name = resource['jobReference']['jobId']
if ('configuration' not in resource or
cls._JOB_TYPE not in resource['configuration']):
raise KeyError('Resource lacks required configuration: '
'["configuration"]["%s"]' % cls._JOB_TYPE)
config = resource['configuration'][cls._JOB_TYPE]
return name, config
def begin(self, client=None):
"""API call: begin the job via a POST request
See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs/insert
:type client: :class:`gcloud.bigquery.client.Client` or ``NoneType``
:param client: the client to use. If not passed, falls back to the
``client`` stored on the current dataset.
"""
client = self._require_client(client)
path = '/projects/%s/jobs' % (self.project,)
api_response = client.connection.api_request(
method='POST', path=path, data=self._build_resource())
self._set_properties(api_response)
def exists(self, client=None):
"""API call: test for the existence of the job via a GET request
See
https://cloud.google.com/bigquery/docs/reference/v2/jobs/get
:type client: :class:`gcloud.bigquery.client.Client` or ``NoneType``
:param client: the client to use. If not passed, falls back to the
``client`` stored on the current dataset.
"""
client = self._require_client(client)
try:
client.connection.api_request(method='GET', path=self.path,
query_params={'fields': 'id'})
except NotFound:
return False
else:
return True
def reload(self, client=None):
"""API call: refresh job properties via a GET request
See
https://cloud.google.com/bigquery/docs/reference/v2/jobs/get
:type client: :class:`gcloud.bigquery.client.Client` or ``NoneType``
:param client: the client to use. If not passed, falls back to the
``client`` stored on the current dataset.
"""
client = self._require_client(client)
api_response = client.connection.api_request(
method='GET', path=self.path)
self._set_properties(api_response)
def cancel(self, client=None):
"""API call: cancel job via a POST request
See
https://cloud.google.com/bigquery/docs/reference/v2/jobs/cancel
:type client: :class:`gcloud.bigquery.client.Client` or ``NoneType``
:param client: the client to use. If not passed, falls back to the
``client`` stored on the current dataset.
"""
client = self._require_client(client)
api_response = client.connection.api_request(
method='POST', path='%s/cancel' % (self.path,))
self._set_properties(api_response)
class _LoadConfiguration(object):
"""User-settable configuration options for load jobs.
Values which are ``None`` -> server defaults.
"""
_allow_jagged_rows = None
_allow_quoted_newlines = None
_create_disposition = None
_encoding = None
_field_delimiter = None
_ignore_unknown_values = None
_max_bad_records = None
_quote_character = None
_skip_leading_rows = None
_source_format = None
_write_disposition = None
class LoadTableFromStorageJob(_AsyncJob):
"""Asynchronous job for loading data into a table from CloudStorage.
:type name: string
:param name: the name of the job
:type destination: :class:`gcloud.bigquery.table.Table`
:param destination: Table into which data is to be loaded.
:type source_uris: sequence of string
:param source_uris: URIs of one or more data files to be loaded, in
format ``gs://<bucket_name>/<object_name_or_glob>``.
:type client: :class:`gcloud.bigquery.client.Client`
:param client: A client which holds credentials and project configuration
for the dataset (which requires a project).
:type schema: list of :class:`gcloud.bigquery.table.SchemaField`
:param schema: The job's schema
"""
_schema = None
_JOB_TYPE = 'load'
def __init__(self, name, destination, source_uris, client, schema=()):
super(LoadTableFromStorageJob, self).__init__(name, client)
self.destination = destination
self.source_uris = source_uris
# Let the @property do validation.
self.schema = schema
self._configuration = _LoadConfiguration()
@property
def schema(self):
"""Table's schema.
:rtype: list of :class:`SchemaField`
:returns: fields describing the schema
"""
return list(self._schema)
@schema.setter
def schema(self, value):
"""Update table's schema
:type value: list of :class:`SchemaField`
:param value: fields describing the schema
:raises: TypeError if 'value' is not a sequence, or ValueError if
any item in the sequence is not a SchemaField
"""
if not all(isinstance(field, SchemaField) for field in value):
raise ValueError('Schema items must be fields')
self._schema = tuple(value)
@property
def input_file_bytes(self):
"""Count of bytes loaded from source files.
:rtype: integer, or ``NoneType``
:returns: the count (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
return int(statistics['load']['inputFileBytes'])
@property
def input_files(self):
"""Count of source files.
:rtype: integer, or ``NoneType``
:returns: the count (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
return int(statistics['load']['inputFiles'])
@property
def output_bytes(self):
"""Count of bytes saved to destination table.
:rtype: integer, or ``NoneType``
:returns: the count (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
return int(statistics['load']['outputBytes'])
@property
def output_rows(self):
"""Count of rows saved to destination table.
:rtype: integer, or ``NoneType``
:returns: the count (None until set from the server).
"""
statistics = self._properties.get('statistics')
if statistics is not None:
return int(statistics['load']['outputRows'])
allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowJaggedRows
"""
allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.allowQuotedNewlines
"""
create_disposition = CreateDisposition('create_disposition')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.createDisposition
"""
encoding = Encoding('encoding')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
"""
field_delimiter = _TypedProperty('field_delimiter', six.string_types)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.fieldDelimiter
"""
ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.ignoreUnknownValues
"""
max_bad_records = _TypedProperty('max_bad_records', six.integer_types)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.maxBadRecords
"""
quote_character = _TypedProperty('quote_character', six.string_types)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.quote
"""
skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.skipLeadingRows
"""
source_format = SourceFormat('source_format')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.sourceFormat
"""
write_disposition = WriteDisposition('write_disposition')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.writeDisposition
"""
def _populate_config_resource(self, configuration):
"""Helper for _build_resource: copy config properties to resource"""
if self.allow_jagged_rows is not None:
configuration['allowJaggedRows'] = self.allow_jagged_rows
if self.allow_quoted_newlines is not None:
configuration['allowQuotedNewlines'] = self.allow_quoted_newlines
if self.create_disposition is not None:
configuration['createDisposition'] = self.create_disposition
if self.encoding is not None:
configuration['encoding'] = self.encoding
if self.field_delimiter is not None:
configuration['fieldDelimiter'] = self.field_delimiter
if self.ignore_unknown_values is not None:
configuration['ignoreUnknownValues'] = self.ignore_unknown_values
if self.max_bad_records is not None:
configuration['maxBadRecords'] = self.max_bad_records
if self.quote_character is not None:
configuration['quote'] = self.quote_character
if self.skip_leading_rows is not None:
configuration['skipLeadingRows'] = self.skip_leading_rows
if self.source_format is not None:
configuration['sourceFormat'] = self.source_format
if self.write_disposition is not None:
configuration['writeDisposition'] = self.write_disposition
def _build_resource(self):
"""Generate a resource for :meth:`begin`."""
resource = {
'jobReference': {
'projectId': self.project,
'jobId': self.name,
},
'configuration': {
self._JOB_TYPE: {
'sourceUris': self.source_uris,
'destinationTable': {
'projectId': self.destination.project,
'datasetId': self.destination.dataset_name,
'tableId': self.destination.name,
},
},
},
}
configuration = resource['configuration'][self._JOB_TYPE]
self._populate_config_resource(configuration)
if len(self.schema) > 0:
configuration['schema'] = {
'fields': _build_schema_resource(self.schema)}
return resource
def _scrub_local_properties(self, cleaned):
"""Helper: handle subclass properties in cleaned."""
schema = cleaned.pop('schema', {'fields': ()})
self.schema = _parse_schema_resource(schema)
@classmethod
def from_api_repr(cls, resource, client):
"""Factory: construct a job given its API representation
.. note:
This method assumes that the project found in the resource matches
the client's project.
:type resource: dict
:param resource: dataset job representation returned from the API
:type client: :class:`gcloud.bigquery.client.Client`
:param client: Client which holds credentials and project
configuration for the dataset.
:rtype: :class:`gcloud.bigquery.job.LoadTableFromStorageJob`
:returns: Job parsed from ``resource``.
"""
name, config = cls._get_resource_config(resource)
dest_config = config['destinationTable']
dataset = Dataset(dest_config['datasetId'], client)
destination = Table(dest_config['tableId'], dataset)
source_urls = config.get('sourceUris', ())
job = cls(name, destination, source_urls, client=client)
job._set_properties(resource)
return job
class _CopyConfiguration(object):
"""User-settable configuration options for copy jobs.
Values which are ``None`` -> server defaults.
"""
_create_disposition = None
_write_disposition = None
class CopyJob(_AsyncJob):
"""Asynchronous job: copy data into a table from other tables.
:type name: string
:param name: the name of the job
:type destination: :class:`gcloud.bigquery.table.Table`
:param destination: Table into which data is to be loaded.
:type sources: list of :class:`gcloud.bigquery.table.Table`
:param sources: Table into which data is to be loaded.
:type client: :class:`gcloud.bigquery.client.Client`
:param client: A client which holds credentials and project configuration
for the dataset (which requires a project).
"""
_JOB_TYPE = 'copy'
def __init__(self, name, destination, sources, client):
super(CopyJob, self).__init__(name, client)
self.destination = destination
self.sources = sources
self._configuration = _CopyConfiguration()
create_disposition = CreateDisposition('create_disposition')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy.createDisposition
"""
write_disposition = WriteDisposition('write_disposition')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy.writeDisposition
"""
def _populate_config_resource(self, configuration):
"""Helper for _build_resource: copy config properties to resource"""
if self.create_disposition is not None:
configuration['createDisposition'] = self.create_disposition
if self.write_disposition is not None:
configuration['writeDisposition'] = self.write_disposition
def _build_resource(self):
"""Generate a resource for :meth:`begin`."""
source_refs = [{
'projectId': table.project,
'datasetId': table.dataset_name,
'tableId': table.name,
} for table in self.sources]
resource = {
'jobReference': {
'projectId': self.project,
'jobId': self.name,
},
'configuration': {
self._JOB_TYPE: {
'sourceTables': source_refs,
'destinationTable': {
'projectId': self.destination.project,
'datasetId': self.destination.dataset_name,
'tableId': self.destination.name,
},
},
},
}
configuration = resource['configuration'][self._JOB_TYPE]
self._populate_config_resource(configuration)
return resource
@classmethod
def from_api_repr(cls, resource, client):
"""Factory: construct a job given its API representation
.. note:
This method assumes that the project found in the resource matches
the client's project.
:type resource: dict
:param resource: dataset job representation returned from the API
:type client: :class:`gcloud.bigquery.client.Client`
:param client: Client which holds credentials and project
configuration for the dataset.
:rtype: :class:`gcloud.bigquery.job.CopyJob`
:returns: Job parsed from ``resource``.
"""
name, config = cls._get_resource_config(resource)
dest_config = config['destinationTable']
dataset = Dataset(dest_config['datasetId'], client)
destination = Table(dest_config['tableId'], dataset)
sources = []
for source_config in config['sourceTables']:
dataset = Dataset(source_config['datasetId'], client)
sources.append(Table(source_config['tableId'], dataset))
job = cls(name, destination, sources, client=client)
job._set_properties(resource)
return job
class _ExtractConfiguration(object):
"""User-settable configuration options for extract jobs.
Values which are ``None`` -> server defaults.
"""
_compression = None
_destination_format = None
_field_delimiter = None
_print_header = None
class ExtractTableToStorageJob(_AsyncJob):
"""Asynchronous job: extract data from a table into Cloud Storage.
:type name: string
:param name: the name of the job
:type source: :class:`gcloud.bigquery.table.Table`
:param source: Table into which data is to be loaded.
:type destination_uris: list of string
:param destination_uris: URIs describing Cloud Storage blobs into which
extracted data will be written, in format
``gs://<bucket_name>/<object_name_or_glob>``.
:type client: :class:`gcloud.bigquery.client.Client`
:param client: A client which holds credentials and project configuration
for the dataset (which requires a project).
"""
_JOB_TYPE = 'extract'
def __init__(self, name, source, destination_uris, client):
super(ExtractTableToStorageJob, self).__init__(name, client)
self.source = source
self.destination_uris = destination_uris
self._configuration = _ExtractConfiguration()
compression = Compression('compression')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.compression
"""
destination_format = DestinationFormat('destination_format')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.destinationFormat
"""
field_delimiter = _TypedProperty('field_delimiter', six.string_types)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.fieldDelimiter
"""
print_header = _TypedProperty('print_header', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.extracted.printHeader
"""
def _populate_config_resource(self, configuration):
"""Helper for _build_resource: copy config properties to resource"""
if self.compression is not None:
configuration['compression'] = self.compression
if self.destination_format is not None:
configuration['destinationFormat'] = self.destination_format
if self.field_delimiter is not None:
configuration['fieldDelimiter'] = self.field_delimiter
if self.print_header is not None:
configuration['printHeader'] = self.print_header
def _build_resource(self):
"""Generate a resource for :meth:`begin`."""
source_ref = {
'projectId': self.source.project,
'datasetId': self.source.dataset_name,
'tableId': self.source.name,
}
resource = {
'jobReference': {
'projectId': self.project,
'jobId': self.name,
},
'configuration': {
self._JOB_TYPE: {
'sourceTable': source_ref,
'destinationUris': self.destination_uris,
},
},
}
configuration = resource['configuration'][self._JOB_TYPE]
self._populate_config_resource(configuration)
return resource
@classmethod
def from_api_repr(cls, resource, client):
"""Factory: construct a job given its API representation
.. note:
This method assumes that the project found in the resource matches
the client's project.
:type resource: dict
:param resource: dataset job representation returned from the API
:type client: :class:`gcloud.bigquery.client.Client`
:param client: Client which holds credentials and project
configuration for the dataset.
:rtype: :class:`gcloud.bigquery.job.ExtractTableToStorageJob`
:returns: Job parsed from ``resource``.
"""
name, config = cls._get_resource_config(resource)
source_config = config['sourceTable']
dataset = Dataset(source_config['datasetId'], client)
source = Table(source_config['tableId'], dataset)
destination_uris = config['destinationUris']
job = cls(name, source, destination_uris, client=client)
job._set_properties(resource)
return job
class _AsyncQueryConfiguration(object):
"""User-settable configuration options for asynchronous query jobs.
Values which are ``None`` -> server defaults.
"""
_allow_large_results = None
_create_disposition = None
_default_dataset = None
_destination = None
_flatten_results = None
_priority = None
_use_query_cache = None
_write_disposition = None
class QueryJob(_AsyncJob):
"""Asynchronous job: query tables.
:type name: string
:param name: the name of the job
:type query: string
:param query: SQL query string
:type client: :class:`gcloud.bigquery.client.Client`
:param client: A client which holds credentials and project configuration
for the dataset (which requires a project).
"""
_JOB_TYPE = 'query'
def __init__(self, name, query, client):
super(QueryJob, self).__init__(name, client)
self.query = query
self._configuration = _AsyncQueryConfiguration()
allow_large_results = _TypedProperty('allow_large_results', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.allowLargeResults
"""
create_disposition = CreateDisposition('create_disposition')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition
"""
default_dataset = _TypedProperty('default_dataset', Dataset)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset
"""
destination = _TypedProperty('destination', Table)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.destinationTable
"""
flatten_results = _TypedProperty('flatten_results', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.flattenResults
"""
priority = QueryPriority('priority')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.priority
"""
use_query_cache = _TypedProperty('use_query_cache', bool)
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.useQueryCache
"""
write_disposition = WriteDisposition('write_disposition')
"""See:
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition
"""
def _destination_table_resource(self):
"""Create a JSON resource for the destination table.
Helper for :meth:`_populate_config_resource` and
:meth:`_scrub_local_properties`
"""
if self.destination is not None:
return {
'projectId': self.destination.project,
'datasetId': self.destination.dataset_name,
'tableId': self.destination.name,
}
def _populate_config_resource(self, configuration):
"""Helper for _build_resource: copy config properties to resource"""
if self.allow_large_results is not None:
configuration['allowLargeResults'] = self.allow_large_results
if self.create_disposition is not None:
configuration['createDisposition'] = self.create_disposition
if self.default_dataset is not None:
configuration['defaultDataset'] = {
'projectId': self.default_dataset.project,
'datasetId': self.default_dataset.name,
}
if self.destination is not None:
table_res = self._destination_table_resource()
configuration['destinationTable'] = table_res
if self.flatten_results is not None:
configuration['flattenResults'] = self.flatten_results
if self.priority is not None:
configuration['priority'] = self.priority
if self.use_query_cache is not None:
configuration['useQueryCache'] = self.use_query_cache
if self.write_disposition is not None:
configuration['writeDisposition'] = self.write_disposition
def _build_resource(self):
"""Generate a resource for :meth:`begin`."""
resource = {
'jobReference': {
'projectId': self.project,
'jobId': self.name,
},
'configuration': {
self._JOB_TYPE: {
'query': self.query,
},
},
}
configuration = resource['configuration'][self._JOB_TYPE]
self._populate_config_resource(configuration)
return resource
def _scrub_local_properties(self, cleaned):
"""Helper: handle subclass properties in cleaned.
.. note:
This method assumes that the project found in the resource matches
the client's project.
"""
configuration = cleaned['configuration']['query']
dest_remote = configuration.get('destinationTable')
if dest_remote is None:
if self.destination is not None:
del self.destination
else:
dest_local = self._destination_table_resource()
if dest_remote != dest_local:
dataset = self._client.dataset(dest_remote['datasetId'])
self.destination = dataset.table(dest_remote['tableId'])
@classmethod
def from_api_repr(cls, resource, client):
"""Factory: construct a job given its API representation
:type resource: dict
:param resource: dataset job representation returned from the API
:type client: :class:`gcloud.bigquery.client.Client`
:param client: Client which holds credentials and project
configuration for the dataset.
:rtype: :class:`gcloud.bigquery.job.RunAsyncQueryJob`
:returns: Job parsed from ``resource``.
"""
name, config = cls._get_resource_config(resource)
query = config['query']
job = cls(name, query, client=client)
job._set_properties(resource)
return job