From 9f723b9bbae04cc28b7e2caacf9626b3932709dc Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Thu, 16 Jan 2020 19:15:45 +0530 Subject: [PATCH 1/3] refactor(bigquery): update code samples and move to own sample file --- bigquery/docs/snippets.py | 683 ------------------ bigquery/docs/usage/encryption.rst | 6 +- bigquery/docs/usage/jobs.rst | 24 + bigquery/docs/usage/pandas.rst | 4 +- bigquery/docs/usage/tables.rst | 50 +- bigquery/samples/cancel_job.py | 39 + bigquery/samples/client_query_total_rows.py | 37 + bigquery/samples/create_table_cmek.py | 43 ++ bigquery/samples/extract_table.py | 63 ++ bigquery/samples/extract_table_compressed.py | 59 ++ bigquery/samples/extract_table_json.py | 59 ++ bigquery/samples/get_job.py | 39 + bigquery/samples/list_rows_as_dataframe.py | 38 + bigquery/samples/load_table_add_column.py | 70 ++ bigquery/samples/load_table_file.py | 43 ++ bigquery/samples/load_table_relax_column.py | 76 ++ bigquery/samples/load_table_uri_avro.py | 38 + bigquery/samples/load_table_uri_cmek.py | 60 ++ bigquery/samples/load_table_uri_csv.py | 46 ++ bigquery/samples/load_table_uri_json.py | 46 ++ bigquery/samples/load_table_uri_orc.py | 38 + bigquery/samples/load_table_uri_parquet.py | 37 + .../query_external_gcs_permanent_table.py | 57 ++ .../samples/query_results_as_dataframe.py | 41 ++ bigquery/samples/tests/test_cancel_job.py | 23 + .../tests/test_client_query_total_rows.py | 22 + .../samples/tests/test_create_table_cmek.py | 22 + bigquery/samples/tests/test_extract_table.py | 27 + .../tests/test_extract_table_compressed.py | 27 + .../samples/tests/test_extract_table_json.py | 27 + bigquery/samples/tests/test_get_job.py | 23 + .../tests/test_list_rows_as_dataframe.py | 26 + .../tests/test_load_table_add_column.py | 33 + .../samples/tests/test_load_table_file.py | 38 + .../tests/test_load_table_relax_column.py | 20 + .../samples/tests/test_load_table_uri_avro.py | 21 + .../samples/tests/test_load_table_uri_cmek.py | 22 + .../samples/tests/test_load_table_uri_csv.py | 22 + .../samples/tests/test_load_table_uri_json.py | 22 + .../samples/tests/test_load_table_uri_orc.py | 22 + .../tests/test_load_table_uri_parquet.py | 22 + ...test_query_external_gcs_permanent_table.py | 23 + .../tests/test_query_results_as_dataframe.py | 26 + .../samples/tests/test_update_table_cmek.py | 22 + .../tests/test_update_table_description.py | 22 + bigquery/samples/update_table_cmek.py | 48 ++ bigquery/samples/update_table_description.py | 44 ++ 47 files changed, 1606 insertions(+), 694 deletions(-) create mode 100644 bigquery/samples/cancel_job.py create mode 100644 bigquery/samples/client_query_total_rows.py create mode 100644 bigquery/samples/create_table_cmek.py create mode 100644 bigquery/samples/extract_table.py create mode 100644 bigquery/samples/extract_table_compressed.py create mode 100644 bigquery/samples/extract_table_json.py create mode 100644 bigquery/samples/get_job.py create mode 100644 bigquery/samples/list_rows_as_dataframe.py create mode 100644 bigquery/samples/load_table_add_column.py create mode 100644 bigquery/samples/load_table_file.py create mode 100644 bigquery/samples/load_table_relax_column.py create mode 100644 bigquery/samples/load_table_uri_avro.py create mode 100644 bigquery/samples/load_table_uri_cmek.py create mode 100644 bigquery/samples/load_table_uri_csv.py create mode 100644 bigquery/samples/load_table_uri_json.py create mode 100644 bigquery/samples/load_table_uri_orc.py create mode 100644 bigquery/samples/load_table_uri_parquet.py create mode 100644 bigquery/samples/query_external_gcs_permanent_table.py create mode 100644 bigquery/samples/query_results_as_dataframe.py create mode 100644 bigquery/samples/tests/test_cancel_job.py create mode 100644 bigquery/samples/tests/test_client_query_total_rows.py create mode 100644 bigquery/samples/tests/test_create_table_cmek.py create mode 100644 bigquery/samples/tests/test_extract_table.py create mode 100644 bigquery/samples/tests/test_extract_table_compressed.py create mode 100644 bigquery/samples/tests/test_extract_table_json.py create mode 100644 bigquery/samples/tests/test_get_job.py create mode 100644 bigquery/samples/tests/test_list_rows_as_dataframe.py create mode 100644 bigquery/samples/tests/test_load_table_add_column.py create mode 100644 bigquery/samples/tests/test_load_table_file.py create mode 100644 bigquery/samples/tests/test_load_table_relax_column.py create mode 100644 bigquery/samples/tests/test_load_table_uri_avro.py create mode 100644 bigquery/samples/tests/test_load_table_uri_cmek.py create mode 100644 bigquery/samples/tests/test_load_table_uri_csv.py create mode 100644 bigquery/samples/tests/test_load_table_uri_json.py create mode 100644 bigquery/samples/tests/test_load_table_uri_orc.py create mode 100644 bigquery/samples/tests/test_load_table_uri_parquet.py create mode 100644 bigquery/samples/tests/test_query_external_gcs_permanent_table.py create mode 100644 bigquery/samples/tests/test_query_results_as_dataframe.py create mode 100644 bigquery/samples/tests/test_update_table_cmek.py create mode 100644 bigquery/samples/tests/test_update_table_description.py create mode 100644 bigquery/samples/update_table_cmek.py create mode 100644 bigquery/samples/update_table_description.py diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py index bb584fa0494a..d4aa354a50c5 100644 --- a/bigquery/docs/snippets.py +++ b/bigquery/docs/snippets.py @@ -161,35 +161,6 @@ def test_create_table_nested_repeated_schema(client, to_delete): # [END bigquery_nested_repeated_schema] -def test_create_table_cmek(client, to_delete): - dataset_id = "create_table_cmek_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_create_table_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - table_ref = client.dataset(dataset_id).table("my_table") - table = bigquery.Table(table_ref) - - # Set the encryption key to use for the table. - # TODO: Replace this key with a key you have created in Cloud KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name - ) - - table = client.create_table(table) # API request - - assert table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_create_table_cmek] - - def test_create_partitioned_table(client, to_delete): dataset_id = "create_table_partitioned_{}".format(_millis()) dataset_ref = bigquery.Dataset(client.dataset(dataset_id)) @@ -297,39 +268,6 @@ def test_manage_table_labels(client, to_delete): # [END bigquery_delete_label_table] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_update_table_description(client, to_delete): - """Update a table's description.""" - dataset_id = "update_table_description_dataset_{}".format(_millis()) - table_id = "update_table_description_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table.description = "Original description." - table = client.create_table(table) - - # [START bigquery_update_table_description] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.description == "Original description." - table.description = "Updated description." - - table = client.update_table(table, ["description"]) # API request - - assert table.description == "Updated description." - # [END bigquery_update_table_description] - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -411,51 +349,6 @@ def test_relax_column(client, to_delete): # [END bigquery_relax_column] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_update_table_cmek(client, to_delete): - """Patch a table's metadata.""" - dataset_id = "update_table_cmek_{}".format(_millis()) - table_id = "update_table_cmek_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id)) - original_kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=original_kms_key_name - ) - table = client.create_table(table) - - # [START bigquery_update_table_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - - assert table.encryption_configuration.kms_key_name == original_kms_key_name - - # Set a new encryption key to use for the destination. - # TODO: Replace this key with a key you have created in KMS. - updated_kms_key_name = ( - "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/otherkey" - ) - table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=updated_kms_key_name - ) - - table = client.update_table(table, ["encryption_configuration"]) # API request - - assert table.encryption_configuration.kms_key_name == updated_kms_key_name - assert original_kms_key_name != updated_kms_key_name - # [END bigquery_update_table_cmek] - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -581,269 +474,6 @@ def test_manage_views(client, to_delete): # [END bigquery_grant_view_access] -def test_load_table_from_file(client, to_delete): - """Upload table data from a CSV file.""" - dataset_id = "load_table_from_file_dataset_{}".format(_millis()) - table_id = "load_table_from_file_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - client.create_dataset(dataset) - to_delete.append(dataset) - snippets_dir = os.path.abspath(os.path.dirname(__file__)) - filename = os.path.join( - snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" - ) - - # [START bigquery_load_from_file] - # from google.cloud import bigquery - # client = bigquery.Client() - # filename = '/path/to/file.csv' - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - dataset_ref = client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.CSV - job_config.skip_leading_rows = 1 - job_config.autodetect = True - - with open(filename, "rb") as source_file: - job = client.load_table_from_file(source_file, table_ref, job_config=job_config) - - job.result() # Waits for table load to complete. - - print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id)) - # [END bigquery_load_from_file] - - table = client.get_table(table_ref) - rows = list(client.list_rows(table)) # API request - - assert len(rows) == 2 - # Order is not preserved, so compare individually - row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1}) - assert row1 in rows - row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1}) - assert row2 in rows - - -def test_load_table_from_uri_avro(client, to_delete, capsys): - dataset_id = "load_table_from_uri_avro_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_avro] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.AVRO - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_avro] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_csv(client, to_delete, capsys): - dataset_id = "load_table_from_uri_csv_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_csv] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_csv] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_json(client, to_delete, capsys): - dataset_id = "load_table_from_uri_json_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_json] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - - load_job = client.load_table_from_uri( - uri, - dataset_ref.table("us_states"), - location="US", # Location must match that of the destination dataset. - job_config=job_config, - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_json] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_cmek(client, to_delete): - dataset_id = "load_table_from_uri_cmek_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = "US" - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_json_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.autodetect = True - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - - # Set the encryption key to use for the destination. - # TODO: Replace this key with a key you have created in KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - encryption_config = bigquery.EncryptionConfiguration(kms_key_name=kms_key_name) - job_config.destination_encryption_configuration = encryption_config - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - - load_job = client.load_table_from_uri( - uri, - dataset_ref.table("us_states"), - location="US", # Location must match that of the destination dataset. - job_config=job_config, - ) # API request - - assert load_job.job_type == "load" - - load_job.result() # Waits for table load to complete. - - assert load_job.state == "DONE" - table = client.get_table(dataset_ref.table("us_states")) - assert table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_load_table_gcs_json_cmek] - - -def test_load_table_from_uri_parquet(client, to_delete, capsys): - dataset_id = "load_table_from_uri_parquet_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_parquet] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.PARQUET - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_parquet] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_orc(client, to_delete, capsys): - dataset_id = "load_table_from_uri_orc_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_load_table_gcs_orc] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.ORC - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" - - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_orc] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - def test_load_table_from_uri_autodetect(client, to_delete, capsys): """Load table from a GCS URI using various formats and auto-detected schema Each file format has its own tested load from URI sample. Because most of @@ -1007,74 +637,6 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): assert "Loaded 50 rows." in out -def test_load_table_add_column(client, to_delete): - dataset_id = "load_table_add_column_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - snippets_dir = os.path.abspath(os.path.dirname(__file__)) - filepath = os.path.join( - snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" - ) - table_ref = dataset_ref.table("my_table") - old_schema = [bigquery.SchemaField("full_name", "STRING", mode="REQUIRED")] - table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) - - # [START bigquery_add_column_load_append] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - # filepath = 'path/to/your_file.csv' - - # Retrieves the destination table and checks the length of the schema - table_id = "my_table" - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) - print("Table {} contains {} columns.".format(table_id, len(table.schema))) - - # Configures the load job to append the data to the destination table, - # allowing field addition - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND - job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION - ] - # In this example, the existing table contains only the 'full_name' column. - # 'REQUIRED' fields cannot be added to an existing schema, so the - # additional column must be 'NULLABLE'. - job_config.schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), - ] - job_config.source_format = bigquery.SourceFormat.CSV - job_config.skip_leading_rows = 1 - - with open(filepath, "rb") as source_file: - job = client.load_table_from_file( - source_file, - table_ref, - location="US", # Must match the destination dataset location. - job_config=job_config, - ) # API request - - job.result() # Waits for table load to complete. - print( - "Loaded {} rows into {}:{}.".format( - job.output_rows, dataset_id, table_ref.table_id - ) - ) - - # Checks the updated length of the schema - table = client.get_table(table) - print("Table {} now contains {} columns.".format(table_id, len(table.schema))) - # [END bigquery_add_column_load_append] - assert len(table.schema) == 2 - assert table.num_rows > 0 - - def test_load_table_relax_column(client, to_delete): dataset_id = "load_table_relax_column_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) @@ -1152,213 +714,6 @@ def test_load_table_relax_column(client, to_delete): assert table.num_rows > 0 -def test_extract_table(client, to_delete): - bucket_name = "extract_shakespeare_{}".format(_millis()) - storage_client = storage.Client() - bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) - to_delete.append(bucket) - - # [START bigquery_extract_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # bucket_name = 'my-bucket' - project = "bigquery-public-data" - dataset_id = "samples" - table_id = "shakespeare" - - destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv") - dataset_ref = client.dataset(dataset_id, project=project) - table_ref = dataset_ref.table(table_id) - - extract_job = client.extract_table( - table_ref, - destination_uri, - # Location must match that of the source table. - location="US", - ) # API request - extract_job.result() # Waits for job to complete. - - print( - "Exported {}:{}.{} to {}".format(project, dataset_id, table_id, destination_uri) - ) - # [END bigquery_extract_table] - - blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv") - assert blob.exists - assert blob.size > 0 - to_delete.insert(0, blob) - - -def test_extract_table_json(client, to_delete): - bucket_name = "extract_shakespeare_json_{}".format(_millis()) - storage_client = storage.Client() - bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) - to_delete.append(bucket) - - # [START bigquery_extract_table_json] - # from google.cloud import bigquery - # client = bigquery.Client() - # bucket_name = 'my-bucket' - - destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json") - dataset_ref = client.dataset("samples", project="bigquery-public-data") - table_ref = dataset_ref.table("shakespeare") - job_config = bigquery.job.ExtractJobConfig() - job_config.destination_format = bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON - - extract_job = client.extract_table( - table_ref, - destination_uri, - job_config=job_config, - # Location must match that of the source table. - location="US", - ) # API request - extract_job.result() # Waits for job to complete. - # [END bigquery_extract_table_json] - - blob = retry_storage_errors(bucket.get_blob)("shakespeare.json") - assert blob.exists - assert blob.size > 0 - to_delete.insert(0, blob) - - -def test_extract_table_compressed(client, to_delete): - bucket_name = "extract_shakespeare_compress_{}".format(_millis()) - storage_client = storage.Client() - bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) - to_delete.append(bucket) - - # [START bigquery_extract_table_compressed] - # from google.cloud import bigquery - # client = bigquery.Client() - # bucket_name = 'my-bucket' - - destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz") - dataset_ref = client.dataset("samples", project="bigquery-public-data") - table_ref = dataset_ref.table("shakespeare") - job_config = bigquery.job.ExtractJobConfig() - job_config.compression = bigquery.Compression.GZIP - - extract_job = client.extract_table( - table_ref, - destination_uri, - # Location must match that of the source table. - location="US", - job_config=job_config, - ) # API request - extract_job.result() # Waits for job to complete. - # [END bigquery_extract_table_compressed] - - blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv.gz") - assert blob.exists - assert blob.size > 0 - to_delete.insert(0, blob) - - -def test_client_query_total_rows(client, capsys): - """Run a query and just check for how many rows.""" - # [START bigquery_query_total_rows] - # from google.cloud import bigquery - # client = bigquery.Client() - - query = ( - "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` " - 'WHERE state = "TX" ' - "LIMIT 100" - ) - query_job = client.query( - query, - # Location must match that of the dataset(s) referenced in the query. - location="US", - ) # API request - starts the query - - results = query_job.result() # Wait for query to complete. - print("Got {} rows.".format(results.total_rows)) - # [END bigquery_query_total_rows] - - out, _ = capsys.readouterr() - assert "Got 100 rows." in out - - -def test_manage_job(client): - sql = """ - SELECT corpus - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY corpus; - """ - location = "us" - job = client.query(sql, location=location) - job_id = job.job_id - - # [START bigquery_cancel_job] - # TODO(developer): Uncomment the lines below and replace with your values. - # from google.cloud import bigquery - # client = bigquery.Client() - # job_id = 'bq-job-123x456-123y123z123c' # replace with your job ID - # location = 'us' # replace with your location - - job = client.cancel_job(job_id, location=location) - # [END bigquery_cancel_job] - - # [START bigquery_get_job] - # TODO(developer): Uncomment the lines below and replace with your values. - # from google.cloud import bigquery - # client = bigquery.Client() - # job_id = 'bq-job-123x456-123y123z123c' # replace with your job ID - # location = 'us' # replace with your location - - job = client.get_job(job_id, location=location) # API request - - # Print selected job properties - print("Details for job {} running in {}:".format(job_id, location)) - print( - "\tType: {}\n\tState: {}\n\tCreated: {}".format( - job.job_type, job.state, job.created - ) - ) - # [END bigquery_get_job] - - -def test_query_external_gcs_permanent_table(client, to_delete): - dataset_id = "query_external_gcs_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_query_external_gcs_perm] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - # Configure the external data source - dataset_ref = client.dataset(dataset_id) - table_id = "us_states" - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table = bigquery.Table(dataset_ref.table(table_id), schema=schema) - external_config = bigquery.ExternalConfig("CSV") - external_config.source_uris = [ - "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - ] - external_config.options.skip_leading_rows = 1 # optionally skip header row - table.external_data_configuration = external_config - - # Create a permanent table linked to the GCS file - table = client.create_table(table) # API request - - # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) - - query_job = client.query(sql) # API request - - w_states = list(query_job) # Waits for query to finish - print("There are {} states with names starting with W.".format(len(w_states))) - # [END bigquery_query_external_gcs_perm] - assert len(w_states) == 4 - - def test_ddl_create_view(client, to_delete, capsys): """Create a view via a DDL query.""" project = client.project @@ -1416,43 +771,5 @@ def test_ddl_create_view(client, to_delete, capsys): assert len(df) == 0 -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_query_results_as_dataframe(client): - # [START bigquery_query_results_dataframe] - # from google.cloud import bigquery - # client = bigquery.Client() - - sql = """ - SELECT name, SUM(number) as count - FROM `bigquery-public-data.usa_names.usa_1910_current` - GROUP BY name - ORDER BY count DESC - LIMIT 10 - """ - - df = client.query(sql).to_dataframe() - # [END bigquery_query_results_dataframe] - assert isinstance(df, pandas.DataFrame) - assert len(list(df)) == 2 # verify the number of columns - assert len(df) == 10 # verify the number of rows - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_list_rows_as_dataframe(client): - # [START bigquery_list_rows_dataframe] - # from google.cloud import bigquery - # client = bigquery.Client() - - dataset_ref = client.dataset("samples", project="bigquery-public-data") - table_ref = dataset_ref.table("shakespeare") - table = client.get_table(table_ref) - - df = client.list_rows(table).to_dataframe() - # [END bigquery_list_rows_dataframe] - assert isinstance(df, pandas.DataFrame) - assert len(list(df)) == len(table.schema) # verify the number of columns - assert len(df) == table.num_rows # verify the number of rows - - if __name__ == "__main__": pytest.main() diff --git a/bigquery/docs/usage/encryption.rst b/bigquery/docs/usage/encryption.rst index b512e6c4d7bf..7cae46c4654e 100644 --- a/bigquery/docs/usage/encryption.rst +++ b/bigquery/docs/usage/encryption.rst @@ -10,7 +10,7 @@ in the BigQuery documentation for more details. Create a new table, using a customer-managed encryption key from Cloud KMS to encrypt it. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/create_table_cmek :language: python :dedent: 4 :start-after: [START bigquery_create_table_cmek] @@ -18,7 +18,7 @@ Cloud KMS to encrypt it. Change the key used to encrypt a table. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/update_table_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_update_table_cmek] @@ -27,7 +27,7 @@ Change the key used to encrypt a table. Load a file from Cloud Storage, using a customer-managed encryption key from Cloud KMS for the destination table. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_json_cmek] diff --git a/bigquery/docs/usage/jobs.rst b/bigquery/docs/usage/jobs.rst index c3dd71031bfc..fc8624186ef1 100644 --- a/bigquery/docs/usage/jobs.rst +++ b/bigquery/docs/usage/jobs.rst @@ -19,3 +19,27 @@ List jobs for a project with the :dedent: 4 :start-after: [START bigquery_list_jobs] :end-before: [END bigquery_list_jobs] + +Getting a Job +^^^^^^^^^^^^^^^ + +Get a job resource with the +:func:`~google.cloud.bigquery.client.Client.get_job` method: + +.. literalinclude:: ../samples/get_job.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_job] + :end-before: [END bigquery_get_job] + +Cancelling a Job +^^^^^^^^^^^^^^^ + +Cancel a job with the +:func:`~google.cloud.bigquery.client.Client.cancel_job` method: + +.. literalinclude:: ../samples/cancel_job.py + :language: python + :dedent: 4 + :start-after: [START bigquery_cancel_job] + :end-before: [END bigquery_cancel_job] \ No newline at end of file diff --git a/bigquery/docs/usage/pandas.rst b/bigquery/docs/usage/pandas.rst index 9db98dfbbccb..ae03a12b497e 100644 --- a/bigquery/docs/usage/pandas.rst +++ b/bigquery/docs/usage/pandas.rst @@ -23,7 +23,7 @@ Alternatively, you can install the BigQuery python client library with To retrieve query results as a :class:`pandas.DataFrame`: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/query_results_as_dataframe.py :language: python :dedent: 4 :start-after: [START bigquery_query_results_dataframe] @@ -31,7 +31,7 @@ To retrieve query results as a :class:`pandas.DataFrame`: To retrieve table rows as a :class:`pandas.DataFrame`: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/list_rows_as_dataframe.py :language: python :dedent: 4 :start-after: [START bigquery_list_rows_dataframe] diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst index b6f8dbdde646..7a8790a991f6 100644 --- a/bigquery/docs/usage/tables.rst +++ b/bigquery/docs/usage/tables.rst @@ -70,7 +70,7 @@ Create an integer range partitioned table with the Load table data from a file with the :func:`~google.cloud.bigquery.client.Client.load_table_from_file` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_file.py :language: python :dedent: 4 :start-after: [START bigquery_load_from_file] @@ -79,7 +79,7 @@ Load table data from a file with the Load a CSV file from Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_csv.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_csv] @@ -90,7 +90,7 @@ See also: `Loading CSV data from Cloud Storage Load a JSON file from Cloud Storage: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_json.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_json] @@ -101,7 +101,7 @@ See also: `Loading JSON data from Cloud Storage Load a Parquet file from Cloud Storage: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/load_table_uri_parquet.py :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_parquet] @@ -110,13 +110,35 @@ Load a Parquet file from Cloud Storage: See also: `Loading Parquet data from Cloud Storage `_. +Load a Avro file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_avro.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_avro] + :end-before: [END bigquery_load_table_gcs_avro] + +See also: `Loading Avro data from Cloud Storage +`_. + +Load a ORC file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_orc.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_orc] + :end-before: [END bigquery_load_table_gcs_orc] + +See also: `Loading ORC data from Cloud Storage +`_. + Updating a Table ^^^^^^^^^^^^^^^^ Update a property in a table's metadata with the :func:`~google.cloud.bigquery.client.Client.update_table` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/update_table_description.py :language: python :dedent: 4 :start-after: [START bigquery_update_table_description] @@ -169,12 +191,28 @@ Copy a table with the Copy table data to Google Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.extract_table` method: -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/extract_table.py :language: python :dedent: 4 :start-after: [START bigquery_extract_table] :end-before: [END bigquery_extract_table] +Copy table data to Google Cloud Storage json file: + +.. literalinclude:: ../samples/extract_table_json.py + :language: python + :dedent: 4 + :start-after: [START bigquery_extract_table_json] + :end-before: [END bigquery_extract_table_json] + +Copy table data to Google Cloud Storage compressed file: + +.. literalinclude:: ../samples/extract_table_compressed.py + :language: python + :dedent: 4 + :start-after: [START bigquery_extract_table_compressed] + :end-before: [END bigquery_extract_table_compressed] + Deleting a Table ^^^^^^^^^^^^^^^^ diff --git a/bigquery/samples/cancel_job.py b/bigquery/samples/cancel_job.py new file mode 100644 index 000000000000..5cd3c2f82fa7 --- /dev/null +++ b/bigquery/samples/cancel_job.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def cancel_job(): + + # [START bigquery_cancel_job] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + location = "us" + job = client.query(sql, location=location) + job_id = job.job_id + + job = client.cancel_job(job_id, location=location) + + print("The job has been cancelled") + print( + "Type: {}, State: {}, Created: {}".format(job.job_type, job.state, job.created) + ) + # [END bigquery_cancel_job] diff --git a/bigquery/samples/client_query_total_rows.py b/bigquery/samples/client_query_total_rows.py new file mode 100644 index 000000000000..e9a4f2fea1f4 --- /dev/null +++ b/bigquery/samples/client_query_total_rows.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_total_rows(): + + # [START bigquery_relax_column_query_append] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + query = ( + "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` " + 'WHERE state = "TX" ' + "LIMIT 100" + ) + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location="US", + ) # API request - starts the query + + results = query_job.result() # Wait for query to complete. + print("Got {} rows.".format(results.total_rows)) + # [END bigquery_query_total_rows] diff --git a/bigquery/samples/create_table_cmek.py b/bigquery/samples/create_table_cmek.py new file mode 100644 index 000000000000..b2f786b3df2f --- /dev/null +++ b/bigquery/samples/create_table_cmek.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_cmek(table_id, kms_key_name): + + # [START bigquery_create_table_cmek] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + + table = bigquery.Table(table_id) + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ) + + table = client.create_table(table) # API request + + if table.encryption_configuration.kms_key_name == kms_key_name: + print("A table created with encryption configuration key") + + # [END bigquery_create_table_cmek] diff --git a/bigquery/samples/extract_table.py b/bigquery/samples/extract_table.py new file mode 100644 index 000000000000..5809ec31435e --- /dev/null +++ b/bigquery/samples/extract_table.py @@ -0,0 +1,63 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def extract_table(table_id): + + # [START bigquery_extract_table] + from google.cloud import bigquery + from google.cloud import storage + + import time + + # Construct a BigQuery client object. + client = bigquery.Client() + + # Construct a Storage client object. + storage_client = storage.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + bucket_name = "extract_shakespeare_{}".format(int(time.time() * 1000)) + bucket = storage_client.create_bucket(bucket_name) + + destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv") + + table = bigquery.Table( + table_id, + schema=[ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ) + table = client.create_table(table) + + extract_job = client.extract_table( + table, + destination_uri, + # Location must match that of the source table. + location="US", + ) # API request + extract_job.result() # Waits for job to complete. + + print( + "Exported {}.{}.{} to {}".format( + table.project, table.dataset_id, table.table_id, destination_uri + ) + ) + # [END bigquery_extract_table] + + blob = bucket.get_blob("shakespeare.csv") + return blob, bucket diff --git a/bigquery/samples/extract_table_compressed.py b/bigquery/samples/extract_table_compressed.py new file mode 100644 index 000000000000..d6bf21f54f65 --- /dev/null +++ b/bigquery/samples/extract_table_compressed.py @@ -0,0 +1,59 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def extract_table_compressed(table_id): + + # [START bigquery_extract_table_compressed] + from google.cloud import bigquery + from google.cloud import storage + + import time + + # Construct a BigQuery client object. + client = bigquery.Client() + + # Construct a Storage client object. + storage_client = storage.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + bucket_name = "extract_shakespeare_compress_{}".format(int(time.time() * 1000)) + bucket = storage_client.create_bucket(bucket_name) + + destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz") + + job_config = bigquery.job.ExtractJobConfig( + destination_format=bigquery.Compression.GZIP + ) + extract_job = client.extract_table( + table_id, + destination_uri, + job_config=job_config, + # Location must match that of the source table. + location="US", + ) # API request + extract_job.result() # Waits for job to complete. + + table = client.get_table(table_id) + print( + "Exported {}.{}.{} to {}".format( + table.project, table.dataset_id, table.table_id, destination_uri + ) + ) + # [END bigquery_extract_table_compressed] + + blob = bucket.get_blob("shakespeare.csv.gz") + return blob, bucket diff --git a/bigquery/samples/extract_table_json.py b/bigquery/samples/extract_table_json.py new file mode 100644 index 000000000000..7d96f1fa2946 --- /dev/null +++ b/bigquery/samples/extract_table_json.py @@ -0,0 +1,59 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def extract_table_json(table_id): + + # [START bigquery_extract_table_json] + from google.cloud import bigquery + from google.cloud import storage + + import time + + # Construct a BigQuery client object. + client = bigquery.Client() + + # Construct a Storage client object. + storage_client = storage.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + bucket_name = "extract_shakespeare_json_{}".format(int(time.time() * 1000)) + bucket = storage_client.create_bucket(bucket_name) + + destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json") + + job_config = bigquery.job.ExtractJobConfig( + destination_format=bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON + ) + extract_job = client.extract_table( + table_id, + destination_uri, + job_config=job_config, + # Location must match that of the source table. + location="US", + ) # API request + extract_job.result() # Waits for job to complete. + + table = client.get_table(table_id) + print( + "Exported {}.{}.{} to {}".format( + table.project, table.dataset_id, table.table_id, destination_uri + ) + ) + # [END bigquery_extract_table_json] + + blob = bucket.get_blob("shakespeare.json") + return blob, bucket diff --git a/bigquery/samples/get_job.py b/bigquery/samples/get_job.py new file mode 100644 index 000000000000..37ccf5741aaa --- /dev/null +++ b/bigquery/samples/get_job.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_job(): + + # [START bigquery_get_job] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + location = "us" + job = client.query(sql, location=location) + job_id = job.job_id + + job = client.get_job(job_id, location=location) # API request + print("Details for job {} running in {}:".format(job_id, location)) + + print( + "Type: {}, State: {}, Created: {}".format(job.job_type, job.state, job.created) + ) + # [END bigquery_get_job] diff --git a/bigquery/samples/list_rows_as_dataframe.py b/bigquery/samples/list_rows_as_dataframe.py new file mode 100644 index 000000000000..97262c21c7b9 --- /dev/null +++ b/bigquery/samples/list_rows_as_dataframe.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_rows_as_dataframe(table_id): + + # [START bigquery_list_rows_dataframe] + + from google.cloud import bigquery + + import pandas + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = client.get_table(table_id) + + df = client.list_rows(table).to_dataframe() + + assert isinstance(df, pandas.DataFrame) + print( + "There are {} rows and {} columns in dataframe.".format(len(list(df)), len(df)) + ) + # [END bigquery_list_rows_dataframe] diff --git a/bigquery/samples/load_table_add_column.py b/bigquery/samples/load_table_add_column.py new file mode 100644 index 000000000000..da9b0640356a --- /dev/null +++ b/bigquery/samples/load_table_add_column.py @@ -0,0 +1,70 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_add_column(file_path, table_id): + + # [START bigquery_add_column_load_append] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + old_schema = [bigquery.SchemaField("full_name", "STRING", mode="REQUIRED")] + table = client.create_table(bigquery.Table(table_id, schema=old_schema)) + + # Retrieves the destination table and checks the length of the schema + table = client.get_table(table_id) + print("Table {} contains {} columns.".format(table_id, len(table.schema))) + + # Configures the load job to append the data to the destination table, + # allowing field addition + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_APPEND, + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + # In this example, the existing table contains only the 'full_name' column. + # 'REQUIRED' fields cannot be added to an existing schema, so the + # additional column must be 'NULLABLE'. + schema=[ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), + ], + schema_update_options=[bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION], + ) + + with open(file_path, "rb") as source_file: + job = client.load_table_from_file( + source_file, + table_id, + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # API request + + job.result() # Waits for table load to complete. + print( + "Loaded {} rows into {}:{}.".format( + job.output_rows, table.dataset_id, table.table_id + ) + ) + + # Checks the updated length of the schema + table = client.get_table(table) + print("Table {} now contains {} columns.".format(table_id, len(table.schema))) + # [END bigquery_add_column_load_append] + + return table diff --git a/bigquery/samples/load_table_file.py b/bigquery/samples/load_table_file.py new file mode 100644 index 000000000000..f5a0955909f9 --- /dev/null +++ b/bigquery/samples/load_table_file.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_file(file_path, table_id): + + # [START bigquery_load_table_file] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True, + ) + + with open(file_path, "rb") as source_file: + job = client.load_table_from_file(source_file, table_id, job_config=job_config) + + job.result() # Waits for table load to complete. + + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_table_file] + return table diff --git a/bigquery/samples/load_table_relax_column.py b/bigquery/samples/load_table_relax_column.py new file mode 100644 index 000000000000..a7bc94e522c5 --- /dev/null +++ b/bigquery/samples/load_table_relax_column.py @@ -0,0 +1,76 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_relax_column(file_path, table_id): + + # [START bigquery_relax_column_load_append] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + old_schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("favorite_color", "STRING", mode="REQUIRED"), + ] + table = client.create_table(bigquery.Table(table_id, schema=old_schema)) + + # Checks the number of required fields + original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + # In this example, the existing table has 3 required fields. + print("{} fields in the schema are required.".format(original_required_fields)) + + # Configures the load job to append the data to a destination table, + # allowing field relaxation + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_APPEND, + # In this example, the existing table contains three required fields + # ('full_name', 'age', and 'favorite_color'), while the data to load + # contains only the first two fields. + schema=[ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + schema_update_options=[bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION], + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + ) + + with open(file_path, "rb") as source_file: + job = client.load_table_from_file( + source_file, + table_id, + location="US", # Must match the destination dataset location. + job_config=job_config, + ) # API request + + job.result() # Waits for table load to complete. + print( + "Loaded {} rows into {}:{}.".format( + job.output_rows, table.dataset_id, table.table_id + ) + ) + + # Checks the updated number of required fields + table = client.get_table(table) + current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + print("{} fields in the schema are now required.".format(current_required_fields)) + # [END bigquery_relax_column_load_append] + + return table diff --git a/bigquery/samples/load_table_uri_avro.py b/bigquery/samples/load_table_uri_avro.py new file mode 100644 index 000000000000..4195815b3797 --- /dev/null +++ b/bigquery/samples/load_table_uri_avro.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_cmek(table_id): + + # [START bigquery_load_table_gcs_avro] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.AVRO) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # API request + + load_job.result() # Waits for table load to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro] diff --git a/bigquery/samples/load_table_uri_cmek.py b/bigquery/samples/load_table_uri_cmek.py new file mode 100644 index 000000000000..9114dd39cfce --- /dev/null +++ b/bigquery/samples/load_table_uri_cmek.py @@ -0,0 +1,60 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_cmek(table_id, kms_key_name): + + # [START bigquery_load_table_gcs_json_cmek] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + + job_config = bigquery.LoadJobConfig( + autodetect=True, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + destination_encryption_configuration=bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ), + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + + load_job = client.load_table_from_uri( + uri, + table_id, + location="US", # Location must match that of the destination dataset. + job_config=job_config, + ) # API request + + assert load_job.job_type == "load" + + load_job.result() # Waits for table load to complete. + + assert load_job.state == "DONE" + table = client.get_table(table_id) + + if table.encryption_configuration.kms_key_name == kms_key_name: + print("A table loaded with encryption configuration key") + + # [END bigquery_load_table_gcs_json_cmek] diff --git a/bigquery/samples/load_table_uri_csv.py b/bigquery/samples/load_table_uri_csv.py new file mode 100644 index 000000000000..75363dd26345 --- /dev/null +++ b/bigquery/samples/load_table_uri_csv.py @@ -0,0 +1,46 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_csv(table_id): + + # [START bigquery_load_table_gcs_csv] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + # The source format defaults to CSV, so the line below is optional. + source_format=bigquery.SourceFormat.CSV, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # API request + + load_job.result() # Waits for table load to complete. + + destination_table = client.get_table(table_id) # Make an API request. + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv] diff --git a/bigquery/samples/load_table_uri_json.py b/bigquery/samples/load_table_uri_json.py new file mode 100644 index 000000000000..f14085dc447b --- /dev/null +++ b/bigquery/samples/load_table_uri_json.py @@ -0,0 +1,46 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_json(table_id): + # [START bigquery_load_table_gcs_json] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + + load_job = client.load_table_from_uri( + uri, + table_id, + location="US", # Location must match that of the destination dataset. + job_config=job_config, + ) # API request + + load_job.result() # Waits for table load to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json] diff --git a/bigquery/samples/load_table_uri_orc.py b/bigquery/samples/load_table_uri_orc.py new file mode 100644 index 000000000000..118981055c06 --- /dev/null +++ b/bigquery/samples/load_table_uri_orc.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_orc(table_id): + + # [START bigquery_load_table_gcs_orc] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.ORC) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # API request + + load_job.result() # Waits for table load to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_orc] diff --git a/bigquery/samples/load_table_uri_parquet.py b/bigquery/samples/load_table_uri_parquet.py new file mode 100644 index 000000000000..afe846f69c3e --- /dev/null +++ b/bigquery/samples/load_table_uri_parquet.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_parquet(table_id): + # [START bigquery_load_table_gcs_parquet] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # API request + + load_job.result() # Waits for table load to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_parquet] diff --git a/bigquery/samples/query_external_gcs_permanent_table.py b/bigquery/samples/query_external_gcs_permanent_table.py new file mode 100644 index 000000000000..2db90307ce52 --- /dev/null +++ b/bigquery/samples/query_external_gcs_permanent_table.py @@ -0,0 +1,57 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_external_gcs_permanent_table(table_id): + + # [START bigquery_query_external_gcs_perm] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to create. + # dataset_id = "{}.your_dataset".format(client.project) + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = bigquery.Table( + table_id, + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + external_config = bigquery.ExternalConfig("CSV") + external_config.source_uris = [ + "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + ] + external_config.options.skip_leading_rows = 1 # optionally skip header row + table.external_data_configuration = external_config + + # Create a permanent table linked to the GCS file + table = client.create_table(table) # API request + # Example query to find states starting with 'W' + sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format( + table.dataset_id, table.table_id + ) + + query_job = client.query(sql) # API request + + w_states = list(query_job) # Waits for query to finish + print("There are {} states with names starting with W.".format(len(w_states))) + + # [END bigquery_query_external_gcs_perm] diff --git a/bigquery/samples/query_results_as_dataframe.py b/bigquery/samples/query_results_as_dataframe.py new file mode 100644 index 000000000000..8bd4eeb1466d --- /dev/null +++ b/bigquery/samples/query_results_as_dataframe.py @@ -0,0 +1,41 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def query_results_as_dataframe(): + + # [START bigquery_query_results_dataframe] + from google.cloud import bigquery + + import pandas + + # Construct a BigQuery client object. + client = bigquery.Client() + + # Run a SQL script. + sql = """ + SELECT name, SUM(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_current` + GROUP BY name + ORDER BY count DESC + LIMIT 10 + """ + + df = client.query(sql).to_dataframe() + + assert isinstance(df, pandas.DataFrame) + print( + "There are {} rows and {} columns in dataframe.".format(len(list(df)), len(df)) + ) + # [END bigquery_query_results_dataframe] diff --git a/bigquery/samples/tests/test_cancel_job.py b/bigquery/samples/tests/test_cancel_job.py new file mode 100644 index 000000000000..7ef56dfc3fc6 --- /dev/null +++ b/bigquery/samples/tests/test_cancel_job.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import cancel_job + + +def test_cancel_job(capsys): + + cancel_job.cancel_job() + out, _ = capsys.readouterr() + print("The job has been cancelled") + assert "Type: query, State: DONE," in out diff --git a/bigquery/samples/tests/test_client_query_total_rows.py b/bigquery/samples/tests/test_client_query_total_rows.py new file mode 100644 index 000000000000..4e4fe4bef8e3 --- /dev/null +++ b/bigquery/samples/tests/test_client_query_total_rows.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_total_rows + + +def test_client_query_total_rows(capsys): + + client_query_total_rows.client_query_total_rows() + out, _ = capsys.readouterr() + assert "Got 100 rows." in out diff --git a/bigquery/samples/tests/test_create_table_cmek.py b/bigquery/samples/tests/test_create_table_cmek.py new file mode 100644 index 000000000000..91cf739ed21c --- /dev/null +++ b/bigquery/samples/tests/test_create_table_cmek.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_table_cmek + + +def test_create_table_cmek(capsys, random_table_id, kms_key_name): + + create_table_cmek.create_table_cmek(random_table_id, kms_key_name) + out, err = capsys.readouterr() + assert "A table created with encryption configuration key" in out diff --git a/bigquery/samples/tests/test_extract_table.py b/bigquery/samples/tests/test_extract_table.py new file mode 100644 index 000000000000..d10aac4144f9 --- /dev/null +++ b/bigquery/samples/tests/test_extract_table.py @@ -0,0 +1,27 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import extract_table + + +def test_extract_table(capsys, random_table_id): + + blob, bucket = extract_table.extract_table(random_table_id) + out, _ = capsys.readouterr() + assert "Exported {} ".format(random_table_id) in out + assert blob.exists + assert blob.size > 0 + + blob.delete() + bucket.delete() diff --git a/bigquery/samples/tests/test_extract_table_compressed.py b/bigquery/samples/tests/test_extract_table_compressed.py new file mode 100644 index 000000000000..a6673e73cf66 --- /dev/null +++ b/bigquery/samples/tests/test_extract_table_compressed.py @@ -0,0 +1,27 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import extract_table_compressed + + +def test_extract_table_compressed(capsys, table_with_data_id): + + blob, bucket = extract_table_compressed.extract_table_compressed(table_with_data_id) + out, _ = capsys.readouterr() + assert "Exported {} ".format(table_with_data_id) in out + assert blob.exists + assert blob.size > 0 + + blob.delete() + bucket.delete() diff --git a/bigquery/samples/tests/test_extract_table_json.py b/bigquery/samples/tests/test_extract_table_json.py new file mode 100644 index 000000000000..c436e03dc2a9 --- /dev/null +++ b/bigquery/samples/tests/test_extract_table_json.py @@ -0,0 +1,27 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import extract_table_json + + +def test_extract_table_json(capsys, table_with_data_id): + + blob, bucket = extract_table_json.extract_table_json(table_with_data_id) + out, _ = capsys.readouterr() + assert "Exported {} ".format(table_with_data_id) in out + assert blob.exists + assert blob.size > 0 + + blob.delete() + bucket.delete() diff --git a/bigquery/samples/tests/test_get_job.py b/bigquery/samples/tests/test_get_job.py new file mode 100644 index 000000000000..6f5ebb5d2791 --- /dev/null +++ b/bigquery/samples/tests/test_get_job.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import get_job + + +def test_get_job(capsys): + + get_job.get_job() + out, _ = capsys.readouterr() + assert "Details for job" in out + assert "Type: query, State: DONE," in out diff --git a/bigquery/samples/tests/test_list_rows_as_dataframe.py b/bigquery/samples/tests/test_list_rows_as_dataframe.py new file mode 100644 index 000000000000..55e8aebd6d5d --- /dev/null +++ b/bigquery/samples/tests/test_list_rows_as_dataframe.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .. import list_rows_as_dataframe + + +pandas = pytest.importorskip("pandas") + + +def test_list_rows_as_dataframe(capsys, table_with_data_id): + list_rows_as_dataframe.list_rows_as_dataframe(table_with_data_id) + out, _ = capsys.readouterr() + assert "There are 4 rows and 164656 columns in dataframe." in out diff --git a/bigquery/samples/tests/test_load_table_add_column.py b/bigquery/samples/tests/test_load_table_add_column.py new file mode 100644 index 000000000000..100c83afd530 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_add_column.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from .. import load_table_add_column + + +def test_load_table_add_column(capsys, random_table_id): + + samples_test_dir = os.path.abspath(os.path.dirname(__file__)) + file_path = os.path.join( + samples_test_dir, "..", "..", "tests", "data", "people.csv" + ) + table = load_table_add_column.load_table_add_column(file_path, random_table_id) + out, _ = capsys.readouterr() + assert "Table {} contains 1 columns.".format(random_table_id) in out + assert "Loaded 2 rows into {}:{}.".format(table.dataset_id, table.table_id) + assert "Table {} now contains 2 columns.".format(table.table_id) + + assert len(table.schema) == 2 + assert table.num_rows > 0 diff --git a/bigquery/samples/tests/test_load_table_file.py b/bigquery/samples/tests/test_load_table_file.py new file mode 100644 index 000000000000..979f640a8a14 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_file.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from google.cloud import bigquery + +from .. import load_table_file + + +def test_load_table_file(capsys, random_table_id, client): + + samples_test_dir = os.path.abspath(os.path.dirname(__file__)) + file_path = os.path.join( + samples_test_dir, "..", "..", "tests", "data", "people.csv" + ) + table = load_table_file.load_table_file(file_path, random_table_id) + + out, _ = capsys.readouterr() + assert "Loaded 2 rows and 2 columns" in out + + rows = list(client.list_rows(table)) # API request + assert len(rows) == 2 + # Order is not preserved, so compare individually + row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1}) + assert row1 in rows + row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1}) + assert row2 in rows diff --git a/bigquery/samples/tests/test_load_table_relax_column.py b/bigquery/samples/tests/test_load_table_relax_column.py new file mode 100644 index 000000000000..277e2b92250b --- /dev/null +++ b/bigquery/samples/tests/test_load_table_relax_column.py @@ -0,0 +1,20 @@ +import os + +from .. import load_table_relax_column + + +def test_load_table_relax_column(capsys, random_table_id): + + samples_test_dir = os.path.abspath(os.path.dirname(__file__)) + file_path = os.path.join( + samples_test_dir, "..", "..", "tests", "data", "people.csv" + ) + table = load_table_relax_column.load_table_relax_column(file_path, random_table_id) + out, _ = capsys.readouterr() + assert "3 fields in the schema are required." in out + assert "Loaded 2 rows into {}:{}.".format(table.dataset_id, table.table_id) in out + assert "2 fields in the schema are now required." in out + + assert len(table.schema) == 3 + assert table.schema[2].mode == "NULLABLE" + assert table.num_rows > 0 diff --git a/bigquery/samples/tests/test_load_table_uri_avro.py b/bigquery/samples/tests/test_load_table_uri_avro.py new file mode 100644 index 000000000000..3d459ee072bd --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_avro.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_avro + + +def test_load_table_uri_avro(capsys, random_table_id): + load_table_uri_avro.load_table_uri_cmek(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_cmek.py b/bigquery/samples/tests/test_load_table_uri_cmek.py new file mode 100644 index 000000000000..c8163d354152 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_cmek.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_cmek + + +def test_load_table_uri_cmek(capsys, random_table_id, kms_key_name): + + load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) + out, _ = capsys.readouterr() + assert "A table loaded with encryption configuration key" in out diff --git a/bigquery/samples/tests/test_load_table_uri_csv.py b/bigquery/samples/tests/test_load_table_uri_csv.py new file mode 100644 index 000000000000..87e40abfe09d --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_csv.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_csv + + +def test_load_table_uri_csv(capsys, random_table_id): + + load_table_uri_csv.load_table_uri_csv(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_json.py b/bigquery/samples/tests/test_load_table_uri_json.py new file mode 100644 index 000000000000..cd25f12a5088 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_json.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_json + + +def test_load_table_uri_json(capsys, random_table_id): + + load_table_uri_json.load_table_uri_json(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_orc.py b/bigquery/samples/tests/test_load_table_uri_orc.py new file mode 100644 index 000000000000..e55527342703 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_orc.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_orc + + +def test_load_table_uri_orc(capsys, random_table_id): + + load_table_uri_orc.load_table_uri_orc(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_parquet.py b/bigquery/samples/tests/test_load_table_uri_parquet.py new file mode 100644 index 000000000000..166409a77007 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_parquet.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_parquet + + +def test_load_table_uri_json(capsys, random_table_id): + + load_table_uri_parquet.load_table_uri_parquet(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_query_external_gcs_permanent_table.py b/bigquery/samples/tests/test_query_external_gcs_permanent_table.py new file mode 100644 index 000000000000..0c1516569f0f --- /dev/null +++ b/bigquery/samples/tests/test_query_external_gcs_permanent_table.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import query_external_gcs_permanent_table + + +def test_query_external_gcs_permanent_table(capsys, random_table_id): + query_external_gcs_permanent_table.query_external_gcs_permanent_table( + random_table_id + ) + out, _ = capsys.readouterr() + assert "There are 4 states with names starting with W." in out diff --git a/bigquery/samples/tests/test_query_results_as_dataframe.py b/bigquery/samples/tests/test_query_results_as_dataframe.py new file mode 100644 index 000000000000..52d622b7b4e0 --- /dev/null +++ b/bigquery/samples/tests/test_query_results_as_dataframe.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .. import query_results_as_dataframe + + +pandas = pytest.importorskip("pandas") + + +def test_query_results_as_dataframe(capsys): + query_results_as_dataframe.query_results_as_dataframe() + out, _ = capsys.readouterr() + assert "There are 2 rows and 10 columns in dataframe." in out diff --git a/bigquery/samples/tests/test_update_table_cmek.py b/bigquery/samples/tests/test_update_table_cmek.py new file mode 100644 index 000000000000..a9af4812a928 --- /dev/null +++ b/bigquery/samples/tests/test_update_table_cmek.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_table_cmek + + +def test_update_table_cmek(capsys, random_table_id, kms_key_name): + + update_table_cmek.update_table_cmek(random_table_id, kms_key_name, kms_key_name) + out, _ = capsys.readouterr() + assert "A table updated with encryption configuration key" in out diff --git a/bigquery/samples/tests/test_update_table_description.py b/bigquery/samples/tests/test_update_table_description.py new file mode 100644 index 000000000000..da5d13c5438a --- /dev/null +++ b/bigquery/samples/tests/test_update_table_description.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_table_description + + +def test_update_table_descritpion(capsys, random_table_id): + + update_table_description.update_table_description(random_table_id) + out, _ = capsys.readouterr() + assert "Updated description." in out diff --git a/bigquery/samples/update_table_cmek.py b/bigquery/samples/update_table_cmek.py new file mode 100644 index 000000000000..de6b41c97719 --- /dev/null +++ b/bigquery/samples/update_table_cmek.py @@ -0,0 +1,48 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_table_cmek(table_id, old_kms_key_name, new_kms_key_name): + + # [START bigquery_update_table_cmek] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + + table = bigquery.Table(table_id) + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=old_kms_key_name + ) + + table = client.create_table(table) # API request + + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=new_kms_key_name + ) + table = client.update_table(table, ["encryption_configuration"]) # API request + + if table.encryption_configuration.kms_key_name == new_kms_key_name: + print("A table updated with encryption configuration key") + + # [END bigquery_update_table_cmek] diff --git a/bigquery/samples/update_table_description.py b/bigquery/samples/update_table_description.py new file mode 100644 index 000000000000..2c832d7a7a31 --- /dev/null +++ b/bigquery/samples/update_table_description.py @@ -0,0 +1,44 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_table_description(table_id): + + # [START bigquery_update_table_description] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = bigquery.Table( + table_id, + schema=[ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ) + table = client.create_table(table) + table.description = "Updated description." + table = client.update_table(table, ["description"]) # Make an API request. + + full_table_id = "{}.{}.{}".format(table.project, table.dataset_id, table.table_id) + print( + "Updated table '{}' with description '{}'.".format( + full_table_id, table.description + ) + ) + # [END bigquery_update_table_description] From 5a4f9cca9e85137d5c0fac6d4a1721c817050113 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Sat, 18 Jan 2020 19:28:23 +0530 Subject: [PATCH 2/3] refactor(bigquery): update code samples of table view and table expiration --- bigquery/docs/snippets.py | 541 ------------------ bigquery/docs/usage/encryption.rst | 2 +- bigquery/docs/usage/jobs.rst | 4 +- bigquery/docs/usage/tables.rst | 36 +- bigquery/samples/client_list_jobs.py | 5 +- bigquery/samples/client_query_total_rows.py | 6 +- bigquery/samples/create_partitioned_table.py | 48 ++ bigquery/samples/create_table_cmek.py | 2 +- bigquery/samples/create_view.py | 54 ++ bigquery/samples/create_view_dll.py | 54 ++ bigquery/samples/extract_table.py | 8 +- bigquery/samples/extract_table_compressed.py | 8 +- bigquery/samples/extract_table_json.py | 8 +- bigquery/samples/get_job.py | 2 +- bigquery/samples/get_view.py | 51 ++ bigquery/samples/grant_view_access.py | 75 +++ bigquery/samples/list_rows_as_dataframe.py | 3 +- bigquery/samples/load_table_add_column.py | 4 +- bigquery/samples/load_table_dataframe.py | 4 +- bigquery/samples/load_table_file.py | 6 +- bigquery/samples/load_table_relax_column.py | 4 +- .../samples/load_table_uri_autodetect_csv.py | 45 ++ .../samples/load_table_uri_autodetect_json.py | 42 ++ bigquery/samples/load_table_uri_avro.py | 4 +- bigquery/samples/load_table_uri_cmek.py | 6 +- bigquery/samples/load_table_uri_csv.py | 4 +- bigquery/samples/load_table_uri_json.py | 6 +- bigquery/samples/load_table_uri_orc.py | 4 +- bigquery/samples/load_table_uri_parquet.py | 4 +- .../samples/load_table_uri_truncate_avro.py | 55 ++ .../samples/load_table_uri_truncate_csv.py | 56 ++ .../samples/load_table_uri_truncate_json.py | 55 ++ .../samples/load_table_uri_truncate_orc.py | 55 ++ .../load_table_uri_truncate_parquet.py | 55 ++ .../query_external_gcs_permanent_table.py | 4 +- bigquery/samples/relax_column.py | 49 ++ .../tests/test_create_partitioned_table.py | 24 + bigquery/samples/tests/test_create_view.py | 21 + .../samples/tests/test_create_view_dll.py | 26 + bigquery/samples/tests/test_get_view.py | 22 + .../samples/tests/test_grant_view_access.py | 22 + .../samples/tests/test_load_table_file.py | 2 +- .../tests/test_load_table_relax_column.py | 14 + .../test_load_table_uri_autodetect_csv.py | 22 + .../test_load_table_uri_autodetect_json.py | 22 + .../test_load_table_uri_truncate_avro.py | 21 + .../tests/test_load_table_uri_truncate_csv.py | 21 + .../test_load_table_uri_truncate_json.py | 21 + .../tests/test_load_table_uri_truncate_orc.py | 21 + .../test_load_table_uri_truncate_parquet.py | 21 + bigquery/samples/tests/test_relax_column.py | 23 + .../tests/test_update_table_expiration.py | 28 + bigquery/samples/tests/test_update_view.py | 21 + bigquery/samples/update_table_cmek.py | 6 +- bigquery/samples/update_table_expiration.py | 49 ++ bigquery/samples/update_view.py | 59 ++ 56 files changed, 1240 insertions(+), 595 deletions(-) create mode 100644 bigquery/samples/create_partitioned_table.py create mode 100644 bigquery/samples/create_view.py create mode 100644 bigquery/samples/create_view_dll.py create mode 100644 bigquery/samples/get_view.py create mode 100644 bigquery/samples/grant_view_access.py create mode 100644 bigquery/samples/load_table_uri_autodetect_csv.py create mode 100644 bigquery/samples/load_table_uri_autodetect_json.py create mode 100644 bigquery/samples/load_table_uri_truncate_avro.py create mode 100644 bigquery/samples/load_table_uri_truncate_csv.py create mode 100644 bigquery/samples/load_table_uri_truncate_json.py create mode 100644 bigquery/samples/load_table_uri_truncate_orc.py create mode 100644 bigquery/samples/load_table_uri_truncate_parquet.py create mode 100644 bigquery/samples/relax_column.py create mode 100644 bigquery/samples/tests/test_create_partitioned_table.py create mode 100644 bigquery/samples/tests/test_create_view.py create mode 100644 bigquery/samples/tests/test_create_view_dll.py create mode 100644 bigquery/samples/tests/test_get_view.py create mode 100644 bigquery/samples/tests/test_grant_view_access.py create mode 100644 bigquery/samples/tests/test_load_table_uri_autodetect_csv.py create mode 100644 bigquery/samples/tests/test_load_table_uri_autodetect_json.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_avro.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_csv.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_json.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_orc.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_parquet.py create mode 100644 bigquery/samples/tests/test_relax_column.py create mode 100644 bigquery/samples/tests/test_update_table_expiration.py create mode 100644 bigquery/samples/tests/test_update_view.py create mode 100644 bigquery/samples/update_table_expiration.py create mode 100644 bigquery/samples/update_view.py diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py index d4aa354a50c5..0812df21b42f 100644 --- a/bigquery/docs/snippets.py +++ b/bigquery/docs/snippets.py @@ -161,44 +161,6 @@ def test_create_table_nested_repeated_schema(client, to_delete): # [END bigquery_nested_repeated_schema] -def test_create_partitioned_table(client, to_delete): - dataset_id = "create_table_partitioned_{}".format(_millis()) - dataset_ref = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset_ref) - to_delete.append(dataset) - - # [START bigquery_create_table_partitioned] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - - table_ref = dataset_ref.table("my_partitioned_table") - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - bigquery.SchemaField("date", "DATE"), - ] - table = bigquery.Table(table_ref, schema=schema) - table.time_partitioning = bigquery.TimePartitioning( - type_=bigquery.TimePartitioningType.DAY, - field="date", # name of column to use for partitioning - expiration_ms=7776000000, - ) # 90 days - - table = client.create_table(table) - - print( - "Created table {}, partitioned on column {}".format( - table.table_id, table.time_partitioning.field - ) - ) - # [END bigquery_create_table_partitioned] - - assert table.time_partitioning.type_ == "DAY" - assert table.time_partitioning.field == "date" - assert table.time_partitioning.expiration_ms == 7776000000 - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -268,508 +230,5 @@ def test_manage_table_labels(client, to_delete): # [END bigquery_delete_label_table] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_update_table_expiration(client, to_delete): - """Update a table's expiration time.""" - dataset_id = "update_table_expiration_dataset_{}".format(_millis()) - table_id = "update_table_expiration_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # [START bigquery_update_table_expiration] - import datetime - import pytz - - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.expires is None - - # set table to expire 5 days from now - expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) - table.expires = expiration - table = client.update_table(table, ["expires"]) # API request - - # expiration is stored in milliseconds - margin = datetime.timedelta(microseconds=1000) - assert expiration - margin <= table.expires <= expiration + margin - # [END bigquery_update_table_expiration] - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_relax_column(client, to_delete): - """Updates a schema field from required to nullable.""" - dataset_id = "relax_column_dataset_{}".format(_millis()) - table_id = "relax_column_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_relax_column] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - original_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - table_ref = client.dataset(dataset_id).table(table_id) - table = bigquery.Table(table_ref, schema=original_schema) - table = client.create_table(table) - assert all(field.mode == "REQUIRED" for field in table.schema) - - # SchemaField properties cannot be edited after initialization. - # To make changes, construct new SchemaField objects. - relaxed_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), - ] - table.schema = relaxed_schema - table = client.update_table(table, ["schema"]) - - assert all(field.mode == "NULLABLE" for field in table.schema) - # [END bigquery_relax_column] - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_manage_views(client, to_delete): - project = client.project - source_dataset_id = "source_dataset_{}".format(_millis()) - source_dataset_ref = client.dataset(source_dataset_id) - source_dataset = bigquery.Dataset(source_dataset_ref) - source_dataset = client.create_dataset(source_dataset) - to_delete.append(source_dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.skip_leading_rows = 1 - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - source_table_id = "us_states" - load_job = client.load_table_from_uri( - uri, source_dataset.table(source_table_id), job_config=job_config - ) - load_job.result() - - shared_dataset_id = "shared_dataset_{}".format(_millis()) - shared_dataset_ref = client.dataset(shared_dataset_id) - shared_dataset = bigquery.Dataset(shared_dataset_ref) - shared_dataset = client.create_dataset(shared_dataset) - to_delete.append(shared_dataset) - - # [START bigquery_create_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = client.dataset('my_shared_dataset') - - # This example shows how to create a shared view of a source table of - # US States. The source table contains all 50 states, while the view will - # contain only states with names starting with 'W'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.create_table(view) # API request - - print("Successfully created view at {}".format(view.full_table_id)) - # [END bigquery_create_view] - - # [START bigquery_update_view_query] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = client.dataset('my_shared_dataset') - - # This example shows how to update a shared view of a source table of - # US States. The view's query will be updated to contain only states with - # names starting with 'M'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.update_table(view, ["view_query"]) # API request - # [END bigquery_update_view_query] - - # [START bigquery_get_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # shared_dataset_id = 'my_shared_dataset' - - view_ref = client.dataset(shared_dataset_id).table("my_shared_view") - view = client.get_table(view_ref) # API Request - - # Display view properties - print("View at {}".format(view.full_table_id)) - print("View Query:\n{}".format(view.view_query)) - # [END bigquery_get_view] - assert view.view_query is not None - - analyst_group_email = "example-analyst-group@google.com" - # [START bigquery_grant_view_access] - # from google.cloud import bigquery - # client = bigquery.Client() - - # Assign access controls to the dataset containing the view - # shared_dataset_id = 'my_shared_dataset' - # analyst_group_email = 'data_analysts@example.com' - shared_dataset = client.get_dataset( - client.dataset(shared_dataset_id) - ) # API request - access_entries = shared_dataset.access_entries - access_entries.append( - bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) - ) - shared_dataset.access_entries = access_entries - shared_dataset = client.update_dataset( - shared_dataset, ["access_entries"] - ) # API request - - # Authorize the view to access the source dataset - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - source_dataset = client.get_dataset( - client.dataset(source_dataset_id) - ) # API request - view_reference = { - "projectId": project, - "datasetId": shared_dataset_id, - "tableId": "my_shared_view", - } - access_entries = source_dataset.access_entries - access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) - source_dataset.access_entries = access_entries - source_dataset = client.update_dataset( - source_dataset, ["access_entries"] - ) # API request - # [END bigquery_grant_view_access] - - -def test_load_table_from_uri_autodetect(client, to_delete, capsys): - """Load table from a GCS URI using various formats and auto-detected schema - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = "load_table_from_uri_auto_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # Shared code - # [START bigquery_load_table_gcs_csv_autodetect] - # [START bigquery_load_table_gcs_json_autodetect] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.autodetect = True - # [END bigquery_load_table_gcs_csv_autodetect] - # [END bigquery_load_table_gcs_json_autodetect] - - # Format-specific code - # [START bigquery_load_table_gcs_csv_autodetect] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - # [END bigquery_load_table_gcs_csv_autodetect] - # unset csv-specific attribute - del job_config._properties["load"]["skipLeadingRows"] - - # [START bigquery_load_table_gcs_json_autodetect] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - # [END bigquery_load_table_gcs_json_autodetect] - - # Shared code - # [START bigquery_load_table_gcs_csv_autodetect] - # [START bigquery_load_table_gcs_json_autodetect] - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_csv_autodetect] - # [END bigquery_load_table_gcs_json_autodetect] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_truncate(client, to_delete, capsys): - """Replaces table data with data from a GCS URI using various formats - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table_ref = dataset.table("us_states") - body = six.BytesIO(b"Washington,WA") - client.load_table_from_file(body, table_ref, job_config=job_config).result() - previous_rows = client.get_table(table_ref).num_rows - assert previous_rows > 0 - - # Shared code - # [START bigquery_load_table_gcs_avro_truncate] - # [START bigquery_load_table_gcs_csv_truncate] - # [START bigquery_load_table_gcs_json_truncate] - # [START bigquery_load_table_gcs_parquet_truncate] - # [START bigquery_load_table_gcs_orc_truncate] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('existing_table') - - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - # [END bigquery_load_table_gcs_avro_truncate] - # [END bigquery_load_table_gcs_csv_truncate] - # [END bigquery_load_table_gcs_json_truncate] - # [END bigquery_load_table_gcs_parquet_truncate] - # [END bigquery_load_table_gcs_orc_truncate] - - # Format-specific code - # [START bigquery_load_table_gcs_avro_truncate] - job_config.source_format = bigquery.SourceFormat.AVRO - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" - # [END bigquery_load_table_gcs_avro_truncate] - - # [START bigquery_load_table_gcs_csv_truncate] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - # [END bigquery_load_table_gcs_csv_truncate] - # unset csv-specific attribute - del job_config._properties["load"]["skipLeadingRows"] - - # [START bigquery_load_table_gcs_json_truncate] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - # [END bigquery_load_table_gcs_json_truncate] - - # [START bigquery_load_table_gcs_parquet_truncate] - job_config.source_format = bigquery.SourceFormat.PARQUET - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" - # [END bigquery_load_table_gcs_parquet_truncate] - - # [START bigquery_load_table_gcs_orc_truncate] - job_config.source_format = bigquery.SourceFormat.ORC - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" - # [END bigquery_load_table_gcs_orc_truncate] - - # Shared code - # [START bigquery_load_table_gcs_avro_truncate] - # [START bigquery_load_table_gcs_csv_truncate] - # [START bigquery_load_table_gcs_json_truncate] - # [START bigquery_load_table_gcs_parquet_truncate] - # [START bigquery_load_table_gcs_orc_truncate] - load_job = client.load_table_from_uri( - uri, table_ref, job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(table_ref) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_avro_truncate] - # [END bigquery_load_table_gcs_csv_truncate] - # [END bigquery_load_table_gcs_json_truncate] - # [END bigquery_load_table_gcs_parquet_truncate] - # [END bigquery_load_table_gcs_orc_truncate] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_relax_column(client, to_delete): - dataset_id = "load_table_relax_column_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset.location = "US" - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - snippets_dir = os.path.abspath(os.path.dirname(__file__)) - filepath = os.path.join( - snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv" - ) - table_ref = dataset_ref.table("my_table") - old_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("favorite_color", "STRING", mode="REQUIRED"), - ] - table = client.create_table(bigquery.Table(table_ref, schema=old_schema)) - - # [START bigquery_relax_column_load_append] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - # filepath = 'path/to/your_file.csv' - - # Retrieves the destination table and checks the number of required fields - table_id = "my_table" - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) - original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) - # In this example, the existing table has 3 required fields. - print("{} fields in the schema are required.".format(original_required_fields)) - - # Configures the load job to append the data to a destination table, - # allowing field relaxation - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND - job_config.schema_update_options = [ - bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION - ] - # In this example, the existing table contains three required fields - # ('full_name', 'age', and 'favorite_color'), while the data to load - # contains only the first two fields. - job_config.schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - job_config.source_format = bigquery.SourceFormat.CSV - job_config.skip_leading_rows = 1 - - with open(filepath, "rb") as source_file: - job = client.load_table_from_file( - source_file, - table_ref, - location="US", # Must match the destination dataset location. - job_config=job_config, - ) # API request - - job.result() # Waits for table load to complete. - print( - "Loaded {} rows into {}:{}.".format( - job.output_rows, dataset_id, table_ref.table_id - ) - ) - - # Checks the updated number of required fields - table = client.get_table(table) - current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) - print("{} fields in the schema are now required.".format(current_required_fields)) - # [END bigquery_relax_column_load_append] - assert original_required_fields - current_required_fields == 1 - assert len(table.schema) == 3 - assert table.schema[2].mode == "NULLABLE" - assert table.num_rows > 0 - - -def test_ddl_create_view(client, to_delete, capsys): - """Create a view via a DDL query.""" - project = client.project - dataset_id = "ddl_view_{}".format(_millis()) - table_id = "new_view" - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_ddl_create_view] - # from google.cloud import bigquery - # project = 'my-project' - # dataset_id = 'my_dataset' - # table_id = 'new_view' - # client = bigquery.Client(project=project) - - sql = """ - CREATE VIEW `{}.{}.{}` - OPTIONS( - expiration_timestamp=TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL 48 HOUR), - friendly_name="new_view", - description="a view that expires in 2 days", - labels=[("org_unit", "development")] - ) - AS SELECT name, state, year, number - FROM `bigquery-public-data.usa_names.usa_1910_current` - WHERE state LIKE 'W%' - """.format( - project, dataset_id, table_id - ) - - job = client.query(sql) # API request. - job.result() # Waits for the query to finish. - - print( - 'Created new view "{}.{}.{}".'.format( - job.destination.project, - job.destination.dataset_id, - job.destination.table_id, - ) - ) - # [END bigquery_ddl_create_view] - - out, _ = capsys.readouterr() - assert 'Created new view "{}.{}.{}".'.format(project, dataset_id, table_id) in out - - # Test that listing query result rows succeeds so that generic query - # processing tools work with DDL statements. - rows = list(job) - assert len(rows) == 0 - - if pandas is not None: - df = job.to_dataframe() - assert len(df) == 0 - - if __name__ == "__main__": pytest.main() diff --git a/bigquery/docs/usage/encryption.rst b/bigquery/docs/usage/encryption.rst index 7cae46c4654e..08505c432d82 100644 --- a/bigquery/docs/usage/encryption.rst +++ b/bigquery/docs/usage/encryption.rst @@ -10,7 +10,7 @@ in the BigQuery documentation for more details. Create a new table, using a customer-managed encryption key from Cloud KMS to encrypt it. -.. literalinclude:: ../samples/create_table_cmek +.. literalinclude:: ../samples/create_table_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_create_table_cmek] diff --git a/bigquery/docs/usage/jobs.rst b/bigquery/docs/usage/jobs.rst index fc8624186ef1..4cc29eab7d95 100644 --- a/bigquery/docs/usage/jobs.rst +++ b/bigquery/docs/usage/jobs.rst @@ -21,7 +21,7 @@ List jobs for a project with the :end-before: [END bigquery_list_jobs] Getting a Job -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^ Get a job resource with the :func:`~google.cloud.bigquery.client.Client.get_job` method: @@ -33,7 +33,7 @@ Get a job resource with the :end-before: [END bigquery_get_job] Cancelling a Job -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^ Cancel a job with the :func:`~google.cloud.bigquery.client.Client.cancel_job` method: diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst index 7a8790a991f6..a86f9834e78c 100644 --- a/bigquery/docs/usage/tables.rst +++ b/bigquery/docs/usage/tables.rst @@ -67,6 +67,15 @@ Create an integer range partitioned table with the :start-after: [START bigquery_create_table_range_partitioned] :end-before: [END bigquery_create_table_range_partitioned] +Create a partitioned table with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_partitioned_table.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_partitioned] + :end-before: [END bigquery_create_table_partitioned] + Load table data from a file with the :func:`~google.cloud.bigquery.client.Client.load_table_from_file` method: @@ -132,10 +141,26 @@ Load a ORC file from Cloud Storage: See also: `Loading ORC data from Cloud Storage `_. +Load a autodetect CSV file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_autodetect_csv.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv_autodetect] + :end-before: [END bigquery_load_table_gcs_csv_autodetect] + +Load a autodetect JSON file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_autodetect_json.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_autodetect] + :end-before: [END bigquery_load_table_gcs_json_autodetect] + Updating a Table ^^^^^^^^^^^^^^^^ -Update a property in a table's metadata with the +Update a description property in a table's metadata with the :func:`~google.cloud.bigquery.client.Client.update_table` method: .. literalinclude:: ../samples/update_table_description.py @@ -144,6 +169,15 @@ Update a property in a table's metadata with the :start-after: [START bigquery_update_table_description] :end-before: [END bigquery_update_table_description] +Update a expire property in a table's metadata with the +:func:`~google.cloud.bigquery.client.Client.update_table` method: + +.. literalinclude:: ../samples/update_table_expiration.py + :language: python + :dedent: 4 + :start-after: [START bigquery_update_table_expiration] + :end-before: [END bigquery_update_table_expiration] + Insert rows into a table's data with the :func:`~google.cloud.bigquery.client.Client.insert_rows` method: diff --git a/bigquery/samples/client_list_jobs.py b/bigquery/samples/client_list_jobs.py index b2344e23c7f7..18cc36190a2d 100644 --- a/bigquery/samples/client_list_jobs.py +++ b/bigquery/samples/client_list_jobs.py @@ -16,18 +16,17 @@ def client_list_jobs(): # [START bigquery_list_jobs] + import datetime from google.cloud import bigquery - import datetime - # Construct a BigQuery client object. client = bigquery.Client() # List the 10 most recent jobs in reverse chronological order. # Omit the max_results parameter to list jobs from the past 6 months. print("Last 10 jobs:") - for job in client.list_jobs(max_results=10): # API request(s) + for job in client.list_jobs(max_results=10): # Make an API request.(s) print("{}".format(job.job_id)) # The following are examples of additional optional parameters: diff --git a/bigquery/samples/client_query_total_rows.py b/bigquery/samples/client_query_total_rows.py index e9a4f2fea1f4..18a01f830ff3 100644 --- a/bigquery/samples/client_query_total_rows.py +++ b/bigquery/samples/client_query_total_rows.py @@ -28,10 +28,10 @@ def client_query_total_rows(): ) query_job = client.query( query, - # Location must match that of the dataset(s) referenced in the query. + # Must match the dataset(s) location referenced in the query. location="US", - ) # API request - starts the query + ) # Make an API request. - starts the query - results = query_job.result() # Wait for query to complete. + results = query_job.result() # Wait for the query to complete. print("Got {} rows.".format(results.total_rows)) # [END bigquery_query_total_rows] diff --git a/bigquery/samples/create_partitioned_table.py b/bigquery/samples/create_partitioned_table.py new file mode 100644 index 000000000000..7fd8f6e17276 --- /dev/null +++ b/bigquery/samples/create_partitioned_table.py @@ -0,0 +1,48 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_partitioned_table(table_id): + + # [START bigquery_create_table_partitioned] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + table = bigquery.Table( + table_id, + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ], + ) + table.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # name of column to use for partitioning + expiration_ms=7776000000, + ) # 90 days + + table = client.create_table(table) + print( + "Created table {}, partitioned on column {}".format( + table.table_id, table.time_partitioning.field + ) + ) + # [END bigquery_create_table_partitioned] + return table diff --git a/bigquery/samples/create_table_cmek.py b/bigquery/samples/create_table_cmek.py index b2f786b3df2f..8a538d63bc30 100644 --- a/bigquery/samples/create_table_cmek.py +++ b/bigquery/samples/create_table_cmek.py @@ -35,7 +35,7 @@ def create_table_cmek(table_id, kms_key_name): kms_key_name=kms_key_name ) - table = client.create_table(table) # API request + table = client.create_table(table) # Make an API request. if table.encryption_configuration.kms_key_name == kms_key_name: print("A table created with encryption configuration key") diff --git a/bigquery/samples/create_view.py b/bigquery/samples/create_view.py new file mode 100644 index 000000000000..8da922a3d4ca --- /dev/null +++ b/bigquery/samples/create_view.py @@ -0,0 +1,54 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_view(load_table_id, table_id): + + # [START bigquery_create_view] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to load the data. + # load_table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + + load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) + load_job.result() + table = client.get_table(load_table_id) + view = bigquery.Table(table_id) + sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' + view.view_query = sql_template.format( + view.project, table.dataset_id, table.table_id + ) + view = client.create_table(view) # Make an API request. + + print( + "Successfully created view at {}.{}.{}".format( + view.project, view.dataset_id, view.table_id + ) + ) + # [END bigquery_create_view] diff --git a/bigquery/samples/create_view_dll.py b/bigquery/samples/create_view_dll.py new file mode 100644 index 000000000000..f6407f613f40 --- /dev/null +++ b/bigquery/samples/create_view_dll.py @@ -0,0 +1,54 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_view_dll(table_id): + + # [START bigquery_update_view_query] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to load the data. + # table_id = "your-project.your_dataset.your_table_name" + + sql = """ + CREATE VIEW `{}` + OPTIONS( + expiration_timestamp=TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL 48 HOUR), + friendly_name="new_view", + description="a view that expires in 2 days", + labels=[("org_unit", "development")] + ) + AS SELECT name, state, year, number + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE state LIKE 'W%' + """.format( + table_id + ) + + job = client.query(sql) # API request. + job.result() # Waits for the query to finish. + + print( + "Created new view {}.{}.{}.".format( + job.destination.project, + job.destination.dataset_id, + job.destination.table_id, + ) + ) + # [END bigquery_ddl_create_view] + return job diff --git a/bigquery/samples/extract_table.py b/bigquery/samples/extract_table.py index 5809ec31435e..8892c8ed2b5c 100644 --- a/bigquery/samples/extract_table.py +++ b/bigquery/samples/extract_table.py @@ -16,11 +16,11 @@ def extract_table(table_id): # [START bigquery_extract_table] + import time + from google.cloud import bigquery from google.cloud import storage - import time - # Construct a BigQuery client object. client = bigquery.Client() @@ -47,9 +47,9 @@ def extract_table(table_id): extract_job = client.extract_table( table, destination_uri, - # Location must match that of the source table. + # Must match the source table location. location="US", - ) # API request + ) # Make an API request. extract_job.result() # Waits for job to complete. print( diff --git a/bigquery/samples/extract_table_compressed.py b/bigquery/samples/extract_table_compressed.py index d6bf21f54f65..54cd8621e678 100644 --- a/bigquery/samples/extract_table_compressed.py +++ b/bigquery/samples/extract_table_compressed.py @@ -16,11 +16,11 @@ def extract_table_compressed(table_id): # [START bigquery_extract_table_compressed] + import time + from google.cloud import bigquery from google.cloud import storage - import time - # Construct a BigQuery client object. client = bigquery.Client() @@ -42,9 +42,9 @@ def extract_table_compressed(table_id): table_id, destination_uri, job_config=job_config, - # Location must match that of the source table. + # Must match the source table location. location="US", - ) # API request + ) # Make an API request. extract_job.result() # Waits for job to complete. table = client.get_table(table_id) diff --git a/bigquery/samples/extract_table_json.py b/bigquery/samples/extract_table_json.py index 7d96f1fa2946..bc8f31bcc4de 100644 --- a/bigquery/samples/extract_table_json.py +++ b/bigquery/samples/extract_table_json.py @@ -16,11 +16,11 @@ def extract_table_json(table_id): # [START bigquery_extract_table_json] + import time + from google.cloud import bigquery from google.cloud import storage - import time - # Construct a BigQuery client object. client = bigquery.Client() @@ -42,9 +42,9 @@ def extract_table_json(table_id): table_id, destination_uri, job_config=job_config, - # Location must match that of the source table. + # Must match the source table location. location="US", - ) # API request + ) # Make an API request. extract_job.result() # Waits for job to complete. table = client.get_table(table_id) diff --git a/bigquery/samples/get_job.py b/bigquery/samples/get_job.py index 37ccf5741aaa..dcd8b32a76d6 100644 --- a/bigquery/samples/get_job.py +++ b/bigquery/samples/get_job.py @@ -30,7 +30,7 @@ def get_job(): job = client.query(sql, location=location) job_id = job.job_id - job = client.get_job(job_id, location=location) # API request + job = client.get_job(job_id, location=location) # Make an API request. print("Details for job {} running in {}:".format(job_id, location)) print( diff --git a/bigquery/samples/get_view.py b/bigquery/samples/get_view.py new file mode 100644 index 000000000000..96535b094780 --- /dev/null +++ b/bigquery/samples/get_view.py @@ -0,0 +1,51 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_view(load_table_id, table_id): + + # [START bigquery_get_view] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to load the data. + # load_table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + + load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) + load_job.result() + table = client.get_table(load_table_id) + view = bigquery.Table(table_id) + sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' + view.view_query = sql_template.format( + view.project, table.dataset_id, table.table_id + ) + view = client.create_table(view) # Make an API request. + + print("View at {}".format(view.full_table_id)) + print("View Query:\n{}".format(view.view_query)) + # [END bigquery_get_view] diff --git a/bigquery/samples/grant_view_access.py b/bigquery/samples/grant_view_access.py new file mode 100644 index 000000000000..879b36093632 --- /dev/null +++ b/bigquery/samples/grant_view_access.py @@ -0,0 +1,75 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def grant_view_access(load_table_id, dataset_id): + + # [START bigquery_update_view_query] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to load the data. + # load_table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + + load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) + load_job.result() + table = client.get_table(load_table_id) + analyst_group_email = "example-analyst-group@google.com" + + # Assign access controls to the dataset + shared_dataset = client.get_dataset(client.dataset(table.dataset_id)) # API request + + access_entries = shared_dataset.access_entries + access_entries.append( + bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) + ) + shared_dataset.access_entries = access_entries + shared_dataset = client.update_dataset( + shared_dataset, ["access_entries"] + ) # Make an API request. + if shared_dataset.access_entries: + print("Assign access controls to the dataset successfully.") + + # Authorize the view to access the source dataset + + # TODO(developer): Set dataset_id to the ID of the dataset to create. + # dataset_id = "{}.your_dataset".format(client.project) + source_dataset = client.create_dataset(dataset_id) # Make an API request. + + view_reference = { + "projectId": client.project, + "datasetId": table.dataset_id, + "tableId": table.table_id, + } + access_entries = source_dataset.access_entries + access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) + source_dataset.access_entries = access_entries + source_dataset = client.update_dataset( + source_dataset, ["access_entries"] + ) # Make an API request. + for access_entry in source_dataset.access_entries: + if access_entry.entity_type == "view": + print("Grant view access successfully.") + # [END bigquery_grant_view_access] diff --git a/bigquery/samples/list_rows_as_dataframe.py b/bigquery/samples/list_rows_as_dataframe.py index 97262c21c7b9..1d8b4df9474c 100644 --- a/bigquery/samples/list_rows_as_dataframe.py +++ b/bigquery/samples/list_rows_as_dataframe.py @@ -16,11 +16,10 @@ def list_rows_as_dataframe(table_id): # [START bigquery_list_rows_dataframe] + import pandas from google.cloud import bigquery - import pandas - # Construct a BigQuery client object. client = bigquery.Client() diff --git a/bigquery/samples/load_table_add_column.py b/bigquery/samples/load_table_add_column.py index da9b0640356a..f9d800176ab3 100644 --- a/bigquery/samples/load_table_add_column.py +++ b/bigquery/samples/load_table_add_column.py @@ -53,9 +53,9 @@ def load_table_add_column(file_path, table_id): table_id, location="US", # Must match the destination dataset location. job_config=job_config, - ) # API request + ) # Make an API request. - job.result() # Waits for table load to complete. + job.result() # Waits for the job to complete. print( "Loaded {} rows into {}:{}.".format( job.output_rows, table.dataset_id, table.table_id diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py index feaa4550bac9..a00e252ea562 100644 --- a/bigquery/samples/load_table_dataframe.py +++ b/bigquery/samples/load_table_dataframe.py @@ -16,10 +16,10 @@ def load_table_dataframe(table_id): # [START bigquery_load_table_dataframe] - from google.cloud import bigquery - import pandas + from google.cloud import bigquery + # Construct a BigQuery client object. client = bigquery.Client() diff --git a/bigquery/samples/load_table_file.py b/bigquery/samples/load_table_file.py index f5a0955909f9..63aa343f5cf7 100644 --- a/bigquery/samples/load_table_file.py +++ b/bigquery/samples/load_table_file.py @@ -15,7 +15,7 @@ def load_table_file(file_path, table_id): - # [START bigquery_load_table_file] + # [START bigquery_load_from_file] from google.cloud import bigquery # Construct a BigQuery client object. @@ -31,7 +31,7 @@ def load_table_file(file_path, table_id): with open(file_path, "rb") as source_file: job = client.load_table_from_file(source_file, table_id, job_config=job_config) - job.result() # Waits for table load to complete. + job.result() # Waits for the job to complete. table = client.get_table(table_id) # Make an API request. print( @@ -39,5 +39,5 @@ def load_table_file(file_path, table_id): table.num_rows, len(table.schema), table_id ) ) - # [END bigquery_load_table_file] + # [END bigquery_load_from_file] return table diff --git a/bigquery/samples/load_table_relax_column.py b/bigquery/samples/load_table_relax_column.py index a7bc94e522c5..1d4a9c8696b1 100644 --- a/bigquery/samples/load_table_relax_column.py +++ b/bigquery/samples/load_table_relax_column.py @@ -58,9 +58,9 @@ def load_table_relax_column(file_path, table_id): table_id, location="US", # Must match the destination dataset location. job_config=job_config, - ) # API request + ) # Make an API request. - job.result() # Waits for table load to complete. + job.result() # Waits for the job to complete. print( "Loaded {} rows into {}:{}.".format( job.output_rows, table.dataset_id, table.table_id diff --git a/bigquery/samples/load_table_uri_autodetect_csv.py b/bigquery/samples/load_table_uri_autodetect_csv.py new file mode 100644 index 000000000000..fce37d0ed18d --- /dev/null +++ b/bigquery/samples/load_table_uri_autodetect_csv.py @@ -0,0 +1,45 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_autodetect_csv(table_id): + + # [START bigquery_load_table_gcs_csv_autodetect] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + job_config = bigquery.LoadJobConfig( + autodetect=True, + skip_leading_rows=1, + # The source format defaults to CSV, so the line below is optional. + source_format=bigquery.SourceFormat.CSV, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + load_job.result() # Waits for the job to complete. + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_autodetect] diff --git a/bigquery/samples/load_table_uri_autodetect_json.py b/bigquery/samples/load_table_uri_autodetect_json.py new file mode 100644 index 000000000000..65fd64c311a4 --- /dev/null +++ b/bigquery/samples/load_table_uri_autodetect_json.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_autodetect_json(table_id): + + # [START bigquery_load_table_gcs_json_autodetect] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + job_config = bigquery.LoadJobConfig( + autodetect=True, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + load_job.result() # Waits for the job to complete. + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json_autodetect] diff --git a/bigquery/samples/load_table_uri_avro.py b/bigquery/samples/load_table_uri_avro.py index 4195815b3797..8cfdf1220fd9 100644 --- a/bigquery/samples/load_table_uri_avro.py +++ b/bigquery/samples/load_table_uri_avro.py @@ -29,9 +29,9 @@ def load_table_uri_cmek(table_id): load_job = client.load_table_from_uri( uri, table_id, job_config=job_config - ) # API request + ) # Make an API request. - load_job.result() # Waits for table load to complete. + load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) diff --git a/bigquery/samples/load_table_uri_cmek.py b/bigquery/samples/load_table_uri_cmek.py index 9114dd39cfce..527c9aed7c52 100644 --- a/bigquery/samples/load_table_uri_cmek.py +++ b/bigquery/samples/load_table_uri_cmek.py @@ -43,13 +43,13 @@ def load_table_uri_cmek(table_id, kms_key_name): load_job = client.load_table_from_uri( uri, table_id, - location="US", # Location must match that of the destination dataset. + location="US", # Must match the destination dataset location. job_config=job_config, - ) # API request + ) # Make an API request. assert load_job.job_type == "load" - load_job.result() # Waits for table load to complete. + load_job.result() # Waits for the job to complete. assert load_job.state == "DONE" table = client.get_table(table_id) diff --git a/bigquery/samples/load_table_uri_csv.py b/bigquery/samples/load_table_uri_csv.py index 75363dd26345..9da0d98a3e52 100644 --- a/bigquery/samples/load_table_uri_csv.py +++ b/bigquery/samples/load_table_uri_csv.py @@ -37,9 +37,9 @@ def load_table_uri_csv(table_id): load_job = client.load_table_from_uri( uri, table_id, job_config=job_config - ) # API request + ) # Make an API request. - load_job.result() # Waits for table load to complete. + load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) # Make an API request. print("Loaded {} rows.".format(destination_table.num_rows)) diff --git a/bigquery/samples/load_table_uri_json.py b/bigquery/samples/load_table_uri_json.py index f14085dc447b..47214fef8331 100644 --- a/bigquery/samples/load_table_uri_json.py +++ b/bigquery/samples/load_table_uri_json.py @@ -35,11 +35,11 @@ def load_table_uri_json(table_id): load_job = client.load_table_from_uri( uri, table_id, - location="US", # Location must match that of the destination dataset. + location="US", # Must match the destination dataset location. job_config=job_config, - ) # API request + ) # Make an API request. - load_job.result() # Waits for table load to complete. + load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) diff --git a/bigquery/samples/load_table_uri_orc.py b/bigquery/samples/load_table_uri_orc.py index 118981055c06..94ee61133242 100644 --- a/bigquery/samples/load_table_uri_orc.py +++ b/bigquery/samples/load_table_uri_orc.py @@ -29,9 +29,9 @@ def load_table_uri_orc(table_id): load_job = client.load_table_from_uri( uri, table_id, job_config=job_config - ) # API request + ) # Make an API request. - load_job.result() # Waits for table load to complete. + load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) diff --git a/bigquery/samples/load_table_uri_parquet.py b/bigquery/samples/load_table_uri_parquet.py index afe846f69c3e..971ba7f11456 100644 --- a/bigquery/samples/load_table_uri_parquet.py +++ b/bigquery/samples/load_table_uri_parquet.py @@ -28,9 +28,9 @@ def load_table_uri_parquet(table_id): load_job = client.load_table_from_uri( uri, table_id, job_config=job_config - ) # API request + ) # Make an API request. - load_job.result() # Waits for table load to complete. + load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) diff --git a/bigquery/samples/load_table_uri_truncate_avro.py b/bigquery/samples/load_table_uri_truncate_avro.py new file mode 100644 index 000000000000..22a82286c585 --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_avro.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_avro(table_id): + + # [START bigquery_load_table_gcs_avro_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.AVRO, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_csv.py b/bigquery/samples/load_table_uri_truncate_csv.py new file mode 100644 index 000000000000..3d2ff574f59f --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_csv.py @@ -0,0 +1,56 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_csv(table_id): + + # [START bigquery_load_table_gcs_csv_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_json.py b/bigquery/samples/load_table_uri_truncate_json.py new file mode 100644 index 000000000000..cc7d70567544 --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_json.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_json(table_id): + + # [START bigquery_load_table_gcs_json_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_orc.py b/bigquery/samples/load_table_uri_truncate_orc.py new file mode 100644 index 000000000000..bbf696bbcfdb --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_orc.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_orc(table_id): + + # [START bigquery_load_table_gcs_orc_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.ORC, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_orc_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_parquet.py b/bigquery/samples/load_table_uri_truncate_parquet.py new file mode 100644 index 000000000000..2fe67fdd6623 --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_parquet.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_parquet(table_id): + + # [START bigquery_load_table_gcs_parquet_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.PARQUET, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_parquet_truncate] diff --git a/bigquery/samples/query_external_gcs_permanent_table.py b/bigquery/samples/query_external_gcs_permanent_table.py index 2db90307ce52..08c43197e6a5 100644 --- a/bigquery/samples/query_external_gcs_permanent_table.py +++ b/bigquery/samples/query_external_gcs_permanent_table.py @@ -43,13 +43,13 @@ def query_external_gcs_permanent_table(table_id): table.external_data_configuration = external_config # Create a permanent table linked to the GCS file - table = client.create_table(table) # API request + table = client.create_table(table) # Make an API request. # Example query to find states starting with 'W' sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format( table.dataset_id, table.table_id ) - query_job = client.query(sql) # API request + query_job = client.query(sql) # Make an API request. w_states = list(query_job) # Waits for query to finish print("There are {} states with names starting with W.".format(len(w_states))) diff --git a/bigquery/samples/relax_column.py b/bigquery/samples/relax_column.py new file mode 100644 index 000000000000..6a6d2b0985c2 --- /dev/null +++ b/bigquery/samples/relax_column.py @@ -0,0 +1,49 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def relax_column(table_id): + + # [START bigquery_relax_column] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + original_schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + table = bigquery.Table(table_id, schema=original_schema) + table = client.create_table(table) + # Checks the number of required fields + original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema) + # In this example, the existing table has 2 required fields. + print("{} fields in the schema are required.".format(original_required_fields)) + + # SchemaField properties cannot be edited after initialization. + # To make changes, construct new SchemaField objects. + relaxed_schema = [ + bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + table.schema = relaxed_schema + table = client.update_table(table, ["schema"]) + relaxed_nullable_fields = sum(field.mode == "NULLABLE" for field in table.schema) + # In this example, the existing table has 2 nullable fields. + print("{} fields in the schema are now nullable.".format(relaxed_nullable_fields)) + # [END bigquery_relax_column] diff --git a/bigquery/samples/tests/test_create_partitioned_table.py b/bigquery/samples/tests/test_create_partitioned_table.py new file mode 100644 index 000000000000..807fc4b027ab --- /dev/null +++ b/bigquery/samples/tests/test_create_partitioned_table.py @@ -0,0 +1,24 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_partitioned_table + + +def test_create_partitioned_table(capsys, random_table_id): + table = create_partitioned_table.create_partitioned_table(random_table_id) + out, _ = capsys.readouterr() + assert "Created table {}, partitioned on column date".format(table.table_id) + assert table.time_partitioning.type_ == "DAY" + assert table.time_partitioning.field == "date" + assert table.time_partitioning.expiration_ms == 7776000000 diff --git a/bigquery/samples/tests/test_create_view.py b/bigquery/samples/tests/test_create_view.py new file mode 100644 index 000000000000..ad4b2c17ecb7 --- /dev/null +++ b/bigquery/samples/tests/test_create_view.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_view + + +def test_create_view(capsys, table_id, random_table_id): + create_view.create_view(table_id, random_table_id) + out, err = capsys.readouterr() + assert "Successfully created view at {}".format(random_table_id) in out diff --git a/bigquery/samples/tests/test_create_view_dll.py b/bigquery/samples/tests/test_create_view_dll.py new file mode 100644 index 000000000000..7dd82a03f2be --- /dev/null +++ b/bigquery/samples/tests/test_create_view_dll.py @@ -0,0 +1,26 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_view_dll + + +def test_create_view_dll(capsys, random_table_id): + job = create_view_dll.create_view_dll(random_table_id) + out, _ = capsys.readouterr() + assert "Created new view {}.".format(random_table_id) in out + + # Test that listing query result rows succeeds so that generic query + # processing tools work with DDL statements. + rows = list(job) + assert len(rows) == 0 diff --git a/bigquery/samples/tests/test_get_view.py b/bigquery/samples/tests/test_get_view.py new file mode 100644 index 000000000000..d70dc7501294 --- /dev/null +++ b/bigquery/samples/tests/test_get_view.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import get_view + + +def test_get_view(capsys, table_id, random_table_id): + get_view.get_view(table_id, random_table_id) + out, err = capsys.readouterr() + assert "View at" in out + assert "View Query" in out diff --git a/bigquery/samples/tests/test_grant_view_access.py b/bigquery/samples/tests/test_grant_view_access.py new file mode 100644 index 000000000000..4ae141cb95e5 --- /dev/null +++ b/bigquery/samples/tests/test_grant_view_access.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import grant_view_access + + +def test_grant_view_access(capsys, table_id, random_dataset_id): + grant_view_access.grant_view_access(table_id, random_dataset_id) + out, err = capsys.readouterr() + assert "Assign access controls to the dataset successfully." in out + assert "Grant view access successfully" in out diff --git a/bigquery/samples/tests/test_load_table_file.py b/bigquery/samples/tests/test_load_table_file.py index 979f640a8a14..c4b683533402 100644 --- a/bigquery/samples/tests/test_load_table_file.py +++ b/bigquery/samples/tests/test_load_table_file.py @@ -29,7 +29,7 @@ def test_load_table_file(capsys, random_table_id, client): out, _ = capsys.readouterr() assert "Loaded 2 rows and 2 columns" in out - rows = list(client.list_rows(table)) # API request + rows = list(client.list_rows(table)) # Make an API request. assert len(rows) == 2 # Order is not preserved, so compare individually row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1}) diff --git a/bigquery/samples/tests/test_load_table_relax_column.py b/bigquery/samples/tests/test_load_table_relax_column.py index 277e2b92250b..387ec4fb19ab 100644 --- a/bigquery/samples/tests/test_load_table_relax_column.py +++ b/bigquery/samples/tests/test_load_table_relax_column.py @@ -1,3 +1,17 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from .. import load_table_relax_column diff --git a/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py b/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py new file mode 100644 index 000000000000..7233bd1dd4c2 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_autodetect_csv + + +def test_load_table_uri_autodetect_csv(capsys, random_table_id): + + load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_autodetect_json.py b/bigquery/samples/tests/test_load_table_uri_autodetect_json.py new file mode 100644 index 000000000000..f9227f4edbb9 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_autodetect_json.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_autodetect_json + + +def test_load_table_uri_autodetect_csv(capsys, random_table_id): + + load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_avro.py b/bigquery/samples/tests/test_load_table_uri_truncate_avro.py new file mode 100644 index 000000000000..eb5e65cd07e1 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_avro.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_avro + + +def test_load_table_uri_truncate_avro(capsys, random_table_id): + load_table_uri_truncate_avro.load_table_uri_truncate_avro(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_csv.py b/bigquery/samples/tests/test_load_table_uri_truncate_csv.py new file mode 100644 index 000000000000..0832797f7130 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_csv.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_csv + + +def test_load_table_uri_truncate_csv(capsys, random_table_id): + load_table_uri_truncate_csv.load_table_uri_truncate_csv(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_json.py b/bigquery/samples/tests/test_load_table_uri_truncate_json.py new file mode 100644 index 000000000000..35d42aae91c2 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_json.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_json + + +def test_load_table_uri_truncate_json(capsys, random_table_id): + load_table_uri_truncate_json.load_table_uri_truncate_json(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_orc.py b/bigquery/samples/tests/test_load_table_uri_truncate_orc.py new file mode 100644 index 000000000000..7a61516a12cc --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_orc.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_orc + + +def test_load_table_uri_truncate_orc(capsys, random_table_id): + load_table_uri_truncate_orc.load_table_uri_truncate_orc(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py b/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py new file mode 100644 index 000000000000..2a9c33c322bc --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_parquet + + +def test_load_table_uri_truncate_parquet(capsys, random_table_id): + load_table_uri_truncate_parquet.load_table_uri_truncate_parquet(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_relax_column.py b/bigquery/samples/tests/test_relax_column.py new file mode 100644 index 000000000000..56801278e74d --- /dev/null +++ b/bigquery/samples/tests/test_relax_column.py @@ -0,0 +1,23 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import relax_column + + +def test_relax_column(capsys, random_table_id): + + relax_column.relax_column(random_table_id) + out, _ = capsys.readouterr() + assert "2 fields in the schema are required." in out + assert "2 fields in the schema are now nullable." in out diff --git a/bigquery/samples/tests/test_update_table_expiration.py b/bigquery/samples/tests/test_update_table_expiration.py new file mode 100644 index 000000000000..36188c194806 --- /dev/null +++ b/bigquery/samples/tests/test_update_table_expiration.py @@ -0,0 +1,28 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +from .. import update_table_expiration + + +def test_update_table_expiration(capsys, random_table_id): + + table, expiration = update_table_expiration.update_table_expiration(random_table_id) + out, _ = capsys.readouterr() + assert "Updated table {} with new expiration".format(table.table_id) in out + + # expiration is stored in milliseconds + margin = datetime.timedelta(microseconds=1000) + assert expiration - margin <= table.expires <= expiration + margin diff --git a/bigquery/samples/tests/test_update_view.py b/bigquery/samples/tests/test_update_view.py new file mode 100644 index 000000000000..d29eb464e9be --- /dev/null +++ b/bigquery/samples/tests/test_update_view.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_view + + +def test_update_view(capsys, table_id, random_table_id): + update_view.update_view(table_id, random_table_id) + out, err = capsys.readouterr() + assert "The View query has been updated." in out diff --git a/bigquery/samples/update_table_cmek.py b/bigquery/samples/update_table_cmek.py index de6b41c97719..6071753e6138 100644 --- a/bigquery/samples/update_table_cmek.py +++ b/bigquery/samples/update_table_cmek.py @@ -35,12 +35,14 @@ def update_table_cmek(table_id, old_kms_key_name, new_kms_key_name): kms_key_name=old_kms_key_name ) - table = client.create_table(table) # API request + table = client.create_table(table) # Make an API request. table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=new_kms_key_name ) - table = client.update_table(table, ["encryption_configuration"]) # API request + table = client.update_table( + table, ["encryption_configuration"] + ) # Make an API request. if table.encryption_configuration.kms_key_name == new_kms_key_name: print("A table updated with encryption configuration key") diff --git a/bigquery/samples/update_table_expiration.py b/bigquery/samples/update_table_expiration.py new file mode 100644 index 000000000000..4d6c88165f89 --- /dev/null +++ b/bigquery/samples/update_table_expiration.py @@ -0,0 +1,49 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_table_expiration(table_id): + + # [START bigquery_update_table_expiration] + from google.cloud import bigquery + + import datetime + import pytz + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = bigquery.Table( + table_id, + schema=[ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ) + table = client.create_table(table) + assert table.expires is None + + # set table to expire 5 days from now + expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + table.expires = expiration + table = client.update_table(table, ["expires"]) # Make an API request. + + print( + "Updated table {} with new expiration {}".format(table.table_id, table.expires) + ) + # [END bigquery_update_table_expiration] + return table, expiration diff --git a/bigquery/samples/update_view.py b/bigquery/samples/update_view.py new file mode 100644 index 000000000000..c9e01608eaee --- /dev/null +++ b/bigquery/samples/update_view.py @@ -0,0 +1,59 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_view(load_table_id, table_id): + + # [START bigquery_update_view_query] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to load the data. + # load_table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + + load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) + load_job.result() + table = client.get_table(load_table_id) + view = bigquery.Table(table_id) + sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' + view.view_query = sql_template.format( + view.project, table.dataset_id, table.table_id + ) + view = client.create_table(view) # Make an API request. + old_view_query = view.view_query + + sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' + view.view_query = sql_template.format( + view.project, table.dataset_id, table.table_id + ) + view = client.update_table(view, ["view_query"]) # API request + new_view_query = view.view_query + + if old_view_query != new_view_query: + print("The View query has been updated.") + # [END bigquery_update_view_query] From 3022d3870b3c89154ad5289d04cbb11814d924d7 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Mon, 20 Jan 2020 18:07:21 +0530 Subject: [PATCH 3/3] refactor(bigquery): update code samples of talble lalbel view --- bigquery/docs/snippets.py | 110 ------------------ .../create_table_nested_repeated_schema.py | 53 +++++++++ bigquery/samples/delete_table_labels.py | 36 ++++++ bigquery/samples/get_table_labels.py | 38 ++++++ bigquery/samples/get_view.py | 22 +--- bigquery/samples/grant_view_access.py | 21 +--- bigquery/samples/label_table.py | 35 ++++++ ...st_create_table_nested_repeated_schema.py} | 12 +- .../samples/tests/test_load_table_file.py | 1 + ...st_create_view.py => test_manage_views.py} | 19 ++- ...et_view.py => test_table_label_samples.py} | 21 +++- bigquery/samples/tests/test_update_view.py | 21 ---- bigquery/samples/update_view.py | 23 +--- 13 files changed, 215 insertions(+), 197 deletions(-) create mode 100644 bigquery/samples/create_table_nested_repeated_schema.py create mode 100644 bigquery/samples/delete_table_labels.py create mode 100644 bigquery/samples/get_table_labels.py create mode 100644 bigquery/samples/label_table.py rename bigquery/samples/tests/{test_grant_view_access.py => test_create_table_nested_repeated_schema.py} (67%) rename bigquery/samples/tests/{test_create_view.py => test_manage_views.py} (53%) rename bigquery/samples/tests/{test_get_view.py => test_table_label_samples.py} (52%) delete mode 100644 bigquery/samples/tests/test_update_view.py diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py index 0812df21b42f..744c36534fb7 100644 --- a/bigquery/docs/snippets.py +++ b/bigquery/docs/snippets.py @@ -21,11 +21,9 @@ need to be deleted during teardown. """ -import os import time import pytest -import six try: import fastparquet @@ -122,113 +120,5 @@ def test_create_client_default_credentials(): assert client is not None -def test_create_table_nested_repeated_schema(client, to_delete): - dataset_id = "create_table_nested_repeated_{}".format(_millis()) - dataset_ref = client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_nested_repeated_schema] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_ref = client.dataset('my_dataset') - - schema = [ - bigquery.SchemaField("id", "STRING", mode="NULLABLE"), - bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), - bigquery.SchemaField( - "addresses", - "RECORD", - mode="REPEATED", - fields=[ - bigquery.SchemaField("status", "STRING", mode="NULLABLE"), - bigquery.SchemaField("address", "STRING", mode="NULLABLE"), - bigquery.SchemaField("city", "STRING", mode="NULLABLE"), - bigquery.SchemaField("state", "STRING", mode="NULLABLE"), - bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), - bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), - ], - ), - ] - table_ref = dataset_ref.table("my_table") - table = bigquery.Table(table_ref, schema=schema) - table = client.create_table(table) # API request - - print("Created table {}".format(table.full_table_id)) - # [END bigquery_nested_repeated_schema] - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_manage_table_labels(client, to_delete): - dataset_id = "label_table_dataset_{}".format(_millis()) - table_id = "label_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # [START bigquery_label_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.labels == {} - labels = {"color": "green"} - table.labels = labels - - table = client.update_table(table, ["labels"]) # API request - - assert table.labels == labels - # [END bigquery_label_table] - - # [START bigquery_get_table_labels] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - dataset_ref = client.dataset(dataset_id) - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) # API Request - - # View table labels - print("Table ID: {}".format(table_id)) - print("Labels:") - if table.labels: - for label, value in table.labels.items(): - print("\t{}: {}".format(label, value)) - else: - print("\tTable has no labels defined.") - # [END bigquery_get_table_labels] - assert table.labels == labels - - # [START bigquery_delete_label_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('my_table') - # table = client.get_table(table_ref) # API request - - # This example table starts with one label - assert table.labels == {"color": "green"} - # To delete a label from a table, set its value to None - table.labels["color"] = None - - table = client.update_table(table, ["labels"]) # API request - - assert table.labels == {} - # [END bigquery_delete_label_table] - - if __name__ == "__main__": pytest.main() diff --git a/bigquery/samples/create_table_nested_repeated_schema.py b/bigquery/samples/create_table_nested_repeated_schema.py new file mode 100644 index 000000000000..ef4a2f1db60d --- /dev/null +++ b/bigquery/samples/create_table_nested_repeated_schema.py @@ -0,0 +1,53 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_nested_repeated_schema(table_id): + + # [START bigquery_nested_repeated_schema] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("id", "STRING", mode="NULLABLE"), + bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), + bigquery.SchemaField( + "addresses", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("status", "STRING", mode="NULLABLE"), + bigquery.SchemaField("address", "STRING", mode="NULLABLE"), + bigquery.SchemaField("city", "STRING", mode="NULLABLE"), + bigquery.SchemaField("state", "STRING", mode="NULLABLE"), + bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), + bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), + ], + ), + ] + + table = bigquery.Table(table_id, schema=schema) + table = client.create_table(table) # Make an API request. + + print( + "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) + ) + # [END bigquery_nested_repeated_schema] diff --git a/bigquery/samples/delete_table_labels.py b/bigquery/samples/delete_table_labels.py new file mode 100644 index 000000000000..7464078c047b --- /dev/null +++ b/bigquery/samples/delete_table_labels.py @@ -0,0 +1,36 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_table_labels(table_id): + + # [START bigquery_delete_label_table] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + table = client.get_table(table_id) # Make an API request. + + # To delete a label from a dataset, set its value to None. + table.labels["color"] = None + + table = client.update_table(table, ["labels"]) # Make an API request. + print("Labels deleted from {}".format(table_id)) + # [END bigquery_delete_label_table] + return table diff --git a/bigquery/samples/get_table_labels.py b/bigquery/samples/get_table_labels.py new file mode 100644 index 000000000000..caa93e9deadc --- /dev/null +++ b/bigquery/samples/get_table_labels.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_table_labels(table_id): + + # [START bigquery_label_table] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + table = client.get_table(table_id) # Make an API request. + + # View table labels. + print("Table ID: {}".format(table_id)) + print("Labels:") + if table.labels: + for label, value in table.labels.items(): + print("\t{}: {}".format(label, value)) + else: + print("\tTable has no labels defined.") + # [END bigquery_label_table] diff --git a/bigquery/samples/get_view.py b/bigquery/samples/get_view.py index 96535b094780..fef97fdae2f5 100644 --- a/bigquery/samples/get_view.py +++ b/bigquery/samples/get_view.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_view(load_table_id, table_id): +def get_view(table_id): # [START bigquery_get_view] from google.cloud import bigquery @@ -27,25 +27,9 @@ def get_view(load_table_id, table_id): # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" - job_config = bigquery.LoadJobConfig( - schema=[ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ], - skip_leading_rows=1, - ) - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - - load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) - load_job.result() - table = client.get_table(load_table_id) - view = bigquery.Table(table_id) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' - view.view_query = sql_template.format( - view.project, table.dataset_id, table.table_id - ) - view = client.create_table(view) # Make an API request. + view = client.get_table(table_id) # Make an API request. + # Display view properties print("View at {}".format(view.full_table_id)) print("View Query:\n{}".format(view.view_query)) # [END bigquery_get_view] diff --git a/bigquery/samples/grant_view_access.py b/bigquery/samples/grant_view_access.py index 879b36093632..6bd42a8c3be5 100644 --- a/bigquery/samples/grant_view_access.py +++ b/bigquery/samples/grant_view_access.py @@ -13,7 +13,7 @@ # limitations under the License. -def grant_view_access(load_table_id, dataset_id): +def grant_view_access(table_id, dataset_id): # [START bigquery_update_view_query] from google.cloud import bigquery @@ -21,21 +21,13 @@ def grant_view_access(load_table_id, dataset_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Set table_id to the ID of the table to load the data. + # TODO(developer): Set table_id to the ID of the table. # load_table_id = "your-project.your_dataset.your_table_name" - job_config = bigquery.LoadJobConfig( - schema=[ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ], - skip_leading_rows=1, - ) - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + # TODO(developer): Set dataset_id to the ID of the dataset to create. + # dataset_id = "{}.your_dataset".format(client.project) - load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) - load_job.result() - table = client.get_table(load_table_id) + table = client.get_table(table_id) # Make an API request. analyst_group_email = "example-analyst-group@google.com" # Assign access controls to the dataset @@ -53,9 +45,6 @@ def grant_view_access(load_table_id, dataset_id): print("Assign access controls to the dataset successfully.") # Authorize the view to access the source dataset - - # TODO(developer): Set dataset_id to the ID of the dataset to create. - # dataset_id = "{}.your_dataset".format(client.project) source_dataset = client.create_dataset(dataset_id) # Make an API request. view_reference = { diff --git a/bigquery/samples/label_table.py b/bigquery/samples/label_table.py new file mode 100644 index 000000000000..5a179ef46f39 --- /dev/null +++ b/bigquery/samples/label_table.py @@ -0,0 +1,35 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_table(table_id): + + # [START bigquery_label_table] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + table = client.get_table(table_id) # Make an API request. + assert table.labels == {} + labels = {"color": "green"} + table.labels = labels + + table = client.update_table(table, ["labels"]) # API request + print("Labels added to {}".format(table_id)) + # [END bigquery_label_table] diff --git a/bigquery/samples/tests/test_grant_view_access.py b/bigquery/samples/tests/test_create_table_nested_repeated_schema.py similarity index 67% rename from bigquery/samples/tests/test_grant_view_access.py rename to bigquery/samples/tests/test_create_table_nested_repeated_schema.py index 4ae141cb95e5..e710ecafcff0 100644 --- a/bigquery/samples/tests/test_grant_view_access.py +++ b/bigquery/samples/tests/test_create_table_nested_repeated_schema.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .. import grant_view_access +from .. import create_table_nested_repeated_schema -def test_grant_view_access(capsys, table_id, random_dataset_id): - grant_view_access.grant_view_access(table_id, random_dataset_id) +def test_create_table_nested_repeated_schema(capsys, random_table_id): + + create_table_nested_repeated_schema.create_table_nested_repeated_schema( + random_table_id + ) out, err = capsys.readouterr() - assert "Assign access controls to the dataset successfully." in out - assert "Grant view access successfully" in out + assert "Created table {}".format(random_table_id) in out diff --git a/bigquery/samples/tests/test_load_table_file.py b/bigquery/samples/tests/test_load_table_file.py index c4b683533402..b1451d0bd256 100644 --- a/bigquery/samples/tests/test_load_table_file.py +++ b/bigquery/samples/tests/test_load_table_file.py @@ -13,6 +13,7 @@ # limitations under the License. import os + from google.cloud import bigquery from .. import load_table_file diff --git a/bigquery/samples/tests/test_create_view.py b/bigquery/samples/tests/test_manage_views.py similarity index 53% rename from bigquery/samples/tests/test_create_view.py rename to bigquery/samples/tests/test_manage_views.py index ad4b2c17ecb7..2372bb2a07fb 100644 --- a/bigquery/samples/tests/test_create_view.py +++ b/bigquery/samples/tests/test_manage_views.py @@ -13,9 +13,26 @@ # limitations under the License. from .. import create_view +from .. import update_view +from .. import get_view +from .. import grant_view_access -def test_create_view(capsys, table_id, random_table_id): +def test_manage_views(capsys, table_id, random_table_id, random_dataset_id): create_view.create_view(table_id, random_table_id) out, err = capsys.readouterr() assert "Successfully created view at {}".format(random_table_id) in out + + update_view.update_view(random_table_id) + out, err = capsys.readouterr() + assert "The View query has been updated." in out + + get_view.get_view(random_table_id) + out, err = capsys.readouterr() + assert "View at" in out + assert "View Query" in out + + grant_view_access.grant_view_access(table_id, random_dataset_id) + out, err = capsys.readouterr() + assert "Assign access controls to the dataset successfully." in out + assert "Grant view access successfully" in out diff --git a/bigquery/samples/tests/test_get_view.py b/bigquery/samples/tests/test_table_label_samples.py similarity index 52% rename from bigquery/samples/tests/test_get_view.py rename to bigquery/samples/tests/test_table_label_samples.py index d70dc7501294..bc40353a222e 100644 --- a/bigquery/samples/tests/test_get_view.py +++ b/bigquery/samples/tests/test_table_label_samples.py @@ -12,11 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .. import get_view +from .. import label_table +from .. import get_table_labels +from .. import delete_table_labels -def test_get_view(capsys, table_id, random_table_id): - get_view.get_view(table_id, random_table_id) +def test_table_label_samples(capsys, table_id): + + label_table.label_table(table_id) + out, err = capsys.readouterr() + assert "Labels added to {}".format(table_id) in out + + get_table_labels.get_table_labels(table_id) + out, err = capsys.readouterr() + assert "color: green" in out + + dataset = delete_table_labels.delete_table_labels(table_id) out, err = capsys.readouterr() - assert "View at" in out - assert "View Query" in out + assert "Labels deleted from {}".format(table_id) in out + assert dataset.labels.get("color") is None diff --git a/bigquery/samples/tests/test_update_view.py b/bigquery/samples/tests/test_update_view.py deleted file mode 100644 index d29eb464e9be..000000000000 --- a/bigquery/samples/tests/test_update_view.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .. import update_view - - -def test_update_view(capsys, table_id, random_table_id): - update_view.update_view(table_id, random_table_id) - out, err = capsys.readouterr() - assert "The View query has been updated." in out diff --git a/bigquery/samples/update_view.py b/bigquery/samples/update_view.py index c9e01608eaee..c164f73368a7 100644 --- a/bigquery/samples/update_view.py +++ b/bigquery/samples/update_view.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_view(load_table_id, table_id): +def update_view(table_id): # [START bigquery_update_view_query] from google.cloud import bigquery @@ -27,29 +27,12 @@ def update_view(load_table_id, table_id): # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" - job_config = bigquery.LoadJobConfig( - schema=[ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ], - skip_leading_rows=1, - ) - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - - load_job = client.load_table_from_uri(uri, load_table_id, job_config=job_config) - load_job.result() - table = client.get_table(load_table_id) - view = bigquery.Table(table_id) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' - view.view_query = sql_template.format( - view.project, table.dataset_id, table.table_id - ) - view = client.create_table(view) # Make an API request. + view = client.get_table(table_id) # Make an API request. old_view_query = view.view_query sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' view.view_query = sql_template.format( - view.project, table.dataset_id, table.table_id + view.project, view.dataset_id, view.table_id ) view = client.update_table(view, ["view_query"]) # API request new_view_query = view.view_query