GoogleCloudPlatform · sirtorry · Apr 3, 2019 · Apr 3, 2019 · Apr 4, 2019 · Apr 5, 2019
diff --git a/tables/automl/automl_tables_dataset.py b/tables/automl/automl_tables_dataset.py
diff --git a/tables/automl/automl_tables_model.py b/tables/automl/automl_tables_model.py
diff --git a/tables/automl/automl_tables_predict.py b/tables/automl/automl_tables_predict.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This application demonstrates how to perform basic operations on prediction
+with the Google AutoML Tables API.
+
+For more information, the documentation at
+https://cloud.google.com/automl-tables/docs.
+"""
+
+import argparse
+import os
+
+
+def predict(project_id,
+            compute_region,
+            model_id,
+            file_path,
+            score_threshold="",
+):
+    """Make a prediction."""
+    # [START automl_tables_predict]
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = 'PROJECT_ID_HERE'
+    # compute_region = 'COMPUTE_REGION_HERE'
+    # model_id = 'MODEL_ID_HERE'
+    # file_path = '/local/path/to/file'
+    # score_threshold = 'value from 0.0 to 0.5'
+
+    from google.cloud import automl_v1beta1 as automl
+    import csv
+
+    automl_client = automl.AutoMlClient()
+
+    # Get the full path of the model.
+    model_full_id = automl_client.model_path(
+        project_id, compute_region, model_id
+    )
+
+    # Create client for prediction service.
+    prediction_client = automl.PredictionServiceClient()
+
+    # params is additional domain-specific parameters.
+    # score_threshold is used to filter the result
+    # Initialize params
+    params = {}
+    if score_threshold:
+        params = {"score_threshold": score_threshold}
+
+    with open(file_path, "rt") as csv_file:
+        # Read each row of csv
+        content = csv.reader(csv_file)
+        for row in content:
+            # Create payload
+            values = []
+            for column in row:
+                values.append({'number_value': float(column)})
+            payload = {
+                'row': {'values': values}
+            }
+
+            # Query model
+            response = prediction_client.predict(model_full_id, payload)
+            print("Prediction results:")
+            for result in response.payload:
+                print("Predicted class name: {}".format(result.display_name))
+                print("Predicted class score: {}".format(result.classification.score))
+
+    # [END automl_tables_predict]
+
+
+def batch_predict(project_id,
+                  compute_region,
+                  model_id,
+                  input_path,
+                  output_path):
+    """Make a batch of predictions."""
+    # [START automl_tables_batch_predict]
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = 'PROJECT_ID_HERE'
+    # compute_region = 'COMPUTE_REGION_HERE'
+    # model_id = 'MODEL_ID_HERE'
+    # input_path = 'gs://path/to/file.csv' or
+    #   'bq://project_id.dataset_id.table_id'
+    # output_path = 'gs://path' or `bq://project_id'
+
+
+    from google.cloud import automl_v1beta1 as automl
+    import csv
+
+    automl_client = automl.AutoMlClient()
+
+    # Get the full path of the model.
+    model_full_id = automl_client.model_path(
+        project_id, compute_region, model_id
+    )
+
+    # Create client for prediction service.
+    prediction_client = automl.PredictionServiceClient()
+
+    if input_path.startswith('bq'):
+        input_config = {"bigquery_source": {"input_uri": input_path}}
+    else:    
+        # Get the multiple Google Cloud Storage URIs.
+        input_uris = input_path.split(",").strip()
+        input_config = {"gcs_source": {"input_uris": input_uris}}
+
+    if output_path.startswith('bq'):
+        output_config = {"bigquery_destination": {"output_uri": output_path}}
+    else:    
+        # Get the multiple Google Cloud Storage URIs.
+        output_uris = output_path.split(",").strip()
+        output_config = {"gcs_destination": {"output_uris": output_uris}}
+
+    # Query model
+    response = prediction_client.batch_predict(
+        model_full_id, input_config, output_config)
+    print("Making batch prediction... ")
+    try:
+        result = response.result()
+    except:
+        # Hides Any to BatchPredictResult error.
+        pass
+    print("Batch prediction complete.\n{}".format(response.metadata))
+
+    # [END automl_tables_batch_predict]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    subparsers = parser.add_subparsers(dest="command")
+
+    predict_parser = subparsers.add_parser("predict", help=predict.__doc__)
+    predict_parser.add_argument("--model_id")
+    predict_parser.add_argument("--file_path")
+    predict_parser.add_argument("--score_threshold", nargs="?", default="")
+
+    batch_predict_parser = subparsers.add_parser(
+        "batch_predict", help=predict.__doc__
+    )
+    batch_predict_parser.add_argument("--model_id")
+    batch_predict_parser.add_argument("--input_path")
+    batch_predict_parser.add_argument("--output_path")
+
+    project_id = os.environ["PROJECT_ID"]
+    compute_region = os.environ["REGION_NAME"]
+
+    args = parser.parse_args()
+
+    if args.command == "predict":
+        predict(
+            project_id,
+            compute_region,
+            args.model_id,
+            args.file_path,
+            args.score_threshold,
+        )
+
+    if args.command == "batch_predict":
+        batch_predict(
+            project_id,
+            compute_region,
+            args.model_id,
+            args.input_path,
+            args.output_path,
+        )
diff --git a/tables/automl/notebooks/census_income_prediction/README.md b/tables/automl/notebooks/census_income_prediction/README.md
@@ -0,0 +1,96 @@
+AutoML Tables enables your entire team to automatically build and deploy state-of-the-art machine learning models on structured data at massively increased speed and scale.
+
+
+## Problem Description
+The model uses a real dataset from the [Census Income Dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income).
+
+
+The goal is the predict if a given individual has an income above or below 50k, given information like the person's age, education level, marital-status, occupation etc... 
+This is framed as a binary classification model, to label the individual as either having an income above or below 50k.
+
+
+
+
+
+
+Dataset Details
+
+
+The dataset consists of over 30k rows, where each row corresponds to a different person. For a given row, there are 14 features that the model conditions on to predict the income of the person. A few of the features are named above, and the exhaustive list can be found both in the dataset link above or seen in the colab.
+
+
+
+
+## Solution Walkthrough
+The solution has been developed using [Google Colab Notebook](https://colab.sandbox.google.com/notebooks/welcome.ipynb#recent=true). 
+
+
+
+
+Steps Involved
+
+
+### 1. Set up
+The first step in this process was to set up the project. We referred to the [AutoML tables documentation](https://cloud.google.com/AutoML-tables/docs/) and take the following steps:
+* Create a Google Cloud Platform (GCP) project
+* Enable billing
+* Enable the AutoML API
+* Enable the AutoML Tables API
+* Create a service account, grant required permissions, and download the service account private key.
+
+
+### 2. Initialize and authenticate
+
+
+The client library installation is entirely self explanatory in the colab. 
+
+
+The authentication process is only slightly more complex: run the second code block entitled "Authenticate using service account key" and then upload the service account key you created in the set up step.
+
+
+To make sure your colab was authenticated and has access to your project, replace the project_id with your project_id, and run the subsequent code blocks. You should see the lists of your datasets and any models you made previously in AutoML Tables.
+
+
+### 3. Import training data
+
+
+This section has you create a dataset and import the data. You have both the option of using the csv import from a Cloud Storage bucket, or you can upload the csv into Big Query and import it from there. 
+
+
+
+
+### 4. Update dataset: assign a label column and enable nullable columns
+
+
+This section is important, as it is where you specify which column (meaning which feature) you will use as your label. This label feature will then be predicted using all other features in the row.
+
+
+### 5. Creating a model
+
+
+This section is where you train your model. You can specify how long you want your model to train for. 
+
+
+### 6. Make a prediction
+
+
+This section gives you the ability to do a single online prediction. You can toggle exactly which values you want for all of the numeric features, and choose from the drop down windows which values you want for the categorical features. 
+
+
+The model takes a while to deploy online, and currently there does not exist a feedback mechanism in the sdk, so you will need to wait until the model finishes deployment to run the online prediction.
+When the deployment code ```response = client.deploy_model(model_name)``` finishes, you will be able to see this on the [UI](https://console.cloud.google.com/automl-tables?_ga=2.255483016.-1079099924.1550856636). 
+
+
+To see when it finishes, click on the UI link above and navitage to the dataset you just uploaded, and go to the predict tab. You should see "online prediction" text near the top, click on it, and it will take you to a view of your online prediction interface. You should see "model deployed" on the far right of the screen if the model is deployed, or a "deploying model" message if it is still deploying.
+
+
+Once the model finishes deployment, go ahead and run the ```prediction_client.predict(model_name, payload)``` line.
+
+
+Note: If the model has not finished deployment, the prediction will NOT work.
+
+
+### 7. Batch Prediction
+
+
+There is a validation csv file provided with a few rows of data not used in the training or testing for you to run a batch prediction with. The csv is linked in the text of the colab as well as [here](https://storage.googleapis.com/automl-tables-v1beta1/census_income_batch_prediction_input.csv) .