# Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Define API Queries.""" import six from gcloud.bigquery._helpers import _TypedProperty from gcloud.bigquery._helpers import _rows_from_json from gcloud.bigquery.dataset import Dataset from gcloud.bigquery.job import QueryJob from gcloud.bigquery.table import _parse_schema_resource class _SyncQueryConfiguration(object): """User-settable configuration options for synchronous query jobs. Values which are ``None`` -> server defaults. """ _default_dataset = None _dry_run = None _max_results = None _timeout_ms = None _preserve_nulls = None _use_query_cache = None class QueryResults(object): """Synchronous job: query tables. :type query: string :param query: SQL query string :type client: :class:`gcloud.bigquery.client.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). """ def __init__(self, query, client): self._client = client self._properties = {} self.query = query self._configuration = _SyncQueryConfiguration() self._job = None @property def project(self): """Project bound to the job. :rtype: string :returns: the project (derived from the client). """ return self._client.project def _require_client(self, client): """Check client or verify over-ride. :type client: :class:`gcloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. :rtype: :class:`gcloud.bigquery.client.Client` :returns: The client passed in or the currently bound client. """ if client is None: client = self._client return client @property def cache_hit(self): """Query results served from cache. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#cacheHit :rtype: boolean or ``NoneType`` :returns: True if the query results were served from cache (None until set by the server). """ return self._properties.get('cacheHit') @property def complete(self): """Server completed query. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobComplete :rtype: boolean or ``NoneType`` :returns: True if the query completed on the server (None until set by the server). """ return self._properties.get('jobComplete') @property def errors(self): """Errors generated by the query. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#errors :rtype: list of mapping, or ``NoneType`` :returns: Mappings describing errors generated on the server (None until set by the server). """ return self._properties.get('errors') @property def name(self): """Job name, generated by the back-end. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#jobReference :rtype: list of mapping, or ``NoneType`` :returns: Mappings describing errors generated on the server (None until set by the server). """ return self._properties.get('jobReference', {}).get('jobId') @property def job(self): """Job instance used to run the query. :rtype: :class:`gcloud.bigquery.job.QueryJob`, or ``NoneType`` :returns: Job instance used to run the query (None until ``jobReference`` property is set by the server). """ if self._job is None: job_ref = self._properties.get('jobReference') if job_ref is not None: self._job = QueryJob(job_ref['jobId'], self.query, self._client) return self._job @property def page_token(self): """Token for fetching next bach of results. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#pageToken :rtype: string, or ``NoneType`` :returns: Token generated on the server (None until set by the server). """ return self._properties.get('pageToken') @property def total_rows(self): """Total number of rows returned by the query See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalRows :rtype: integer, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ return self._properties.get('totalRows') @property def total_bytes_processed(self): """Total number of bytes processed by the query See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#totalBytesProcessed :rtype: integer, or ``NoneType`` :returns: Count generated on the server (None until set by the server). """ return self._properties.get('totalBytesProcessed') @property def rows(self): """Query results. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#rows :rtype: list of tuples of row values, or ``NoneType`` :returns: fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get('rows', ()), self.schema) @property def schema(self): """Schema for query results. See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#schema :rtype: list of :class:`SchemaField`, or ``NoneType`` :returns: fields describing the schema (None until set by the server). """ return _parse_schema_resource(self._properties.get('schema', {})) default_dataset = _TypedProperty('default_dataset', Dataset) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#defaultDataset """ dry_run = _TypedProperty('dry_run', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#dryRun """ max_results = _TypedProperty('max_results', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#maxResults """ preserve_nulls = _TypedProperty('preserve_nulls', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#preserveNulls """ timeout_ms = _TypedProperty('timeout_ms', six.integer_types) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#timeoutMs """ use_query_cache = _TypedProperty('use_query_cache', bool) """See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query#useQueryCache """ def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` :type api_response: httplib2.Response :param api_response: response returned from an API call """ self._properties.clear() self._properties.update(api_response) def _build_resource(self): """Generate a resource for :meth:`begin`.""" resource = {'query': self.query} if self.default_dataset is not None: resource['defaultDataset'] = { 'projectId': self.project, 'datasetId': self.default_dataset.name, } if self.max_results is not None: resource['maxResults'] = self.max_results if self.preserve_nulls is not None: resource['preserveNulls'] = self.preserve_nulls if self.timeout_ms is not None: resource['timeoutMs'] = self.timeout_ms if self.use_query_cache is not None: resource['useQueryCache'] = self.use_query_cache if self.dry_run is not None: resource['dryRun'] = self.dry_run return resource def run(self, client=None): """API call: run the query via a POST request See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query :type client: :class:`gcloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. """ client = self._require_client(client) path = '/projects/%s/queries' % (self.project,) api_response = client.connection.api_request( method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) def fetch_data(self, max_results=None, page_token=None, start_index=None, timeout_ms=None, client=None): """API call: fetch a page of query result data via a GET request See: https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults :type max_results: integer or ``NoneType`` :param max_results: maximum number of rows to return. :type page_token: string or ``NoneType`` :param page_token: token representing a cursor into the table's rows. :type start_index: integer or ``NoneType`` :param start_index: zero-based index of starting row :type timeout_ms: integer or ``NoneType`` :param timeout_ms: timeout, in milliseconds, to wait for query to complete :type client: :class:`gcloud.bigquery.client.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. :rtype: tuple :returns: ``(row_data, total_rows, page_token)``, where ``row_data`` is a list of tuples, one per result row, containing only the values; ``total_rows`` is a count of the total number of rows in the table; and ``page_token`` is an opaque string which can be used to fetch the next batch of rows (``None`` if no further batches can be fetched). :raises: ValueError if the query has not yet been executed. """ if self.name is None: raise ValueError("Query not yet executed: call 'run()'") client = self._require_client(client) params = {} if max_results is not None: params['maxResults'] = max_results if page_token is not None: params['pageToken'] = page_token if start_index is not None: params['startIndex'] = start_index if timeout_ms is not None: params['timeoutMs'] = timeout_ms path = '/projects/%s/queries/%s' % (self.project, self.name) response = client.connection.api_request(method='GET', path=path, query_params=params) self._set_properties(response) total_rows = response.get('totalRows') page_token = response.get('pageToken') rows_data = _rows_from_json(response.get('rows', ()), self.schema) return rows_data, total_rows, page_token