Skip to content

Commit

Permalink
Inspect gcs sample b 157050959 (#3146)
Browse files Browse the repository at this point in the history
Fixes internal bug b/157050959

- [X] I have followed [Sample Format Guide](https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/SAMPLE_FORMAT.md)
- [X] `pom.xml` parent set to latest `shared-configuration`
- [X] Appropriate changes to README are included in PR
- [X] API's need to be enabled to test (tell us) (**Nothing new**)
- [X] Environment Variables need to be set (ask us to set them) (**Nothing new**)
- [X] **Tests** pass:   `mvn clean verify` **required**
- [X] **Lint**  passes: `mvn -P lint checkstyle:check` **required**
- [X] **Static Analysis**:  `mvn -P lint clean compile pmd:cpd-check spotbugs:check` **advisory only**
- [X] Please **merge** this PR for me once it is approved.
  • Loading branch information
sushicw authored Jun 11, 2020
1 parent 2815d73 commit fe599dc
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 2 deletions.
6 changes: 4 additions & 2 deletions dlp/src/main/java/dlp/snippets/InspectGcsFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,11 @@ public static void inspectGcsFile(
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Specify the GCS file to be inspected.
FileSet fileSet = FileSet.newBuilder().setUrl(gcsUri).build();
CloudStorageOptions cloudStorageOptions =
CloudStorageOptions.newBuilder().setFileSet(fileSet).build();
CloudStorageOptions.newBuilder()
.setFileSet(FileSet.newBuilder().setUrl(gcsUri))
.build();

StorageConfig storageConfig =
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();

Expand Down
174 changes: 174 additions & 0 deletions dlp/src/main/java/dlp/snippets/InspectGcsFileWithSampling.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* Copyright 2019 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package dlp.snippets;

// [START dlp_inspect_gcs]

import com.google.api.core.SettableApiFuture;
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.cloud.pubsub.v1.AckReplyConsumer;
import com.google.cloud.pubsub.v1.MessageReceiver;
import com.google.cloud.pubsub.v1.Subscriber;
import com.google.privacy.dlp.v2.Action;
import com.google.privacy.dlp.v2.CloudStorageOptions;
import com.google.privacy.dlp.v2.CloudStorageOptions.FileSet;
import com.google.privacy.dlp.v2.CloudStorageOptions.SampleMethod;
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
import com.google.privacy.dlp.v2.DlpJob;
import com.google.privacy.dlp.v2.FileType;
import com.google.privacy.dlp.v2.GetDlpJobRequest;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeStats;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectDataSourceDetails;
import com.google.privacy.dlp.v2.InspectJobConfig;
import com.google.privacy.dlp.v2.Likelihood;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.StorageConfig;
import com.google.pubsub.v1.ProjectSubscriptionName;
import com.google.pubsub.v1.PubsubMessage;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class InspectGcsFileWithSampling {

public static void inspectGcsFileWithSampling()
throws InterruptedException, ExecutionException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
String gcsUri = "gs://" + "your-bucket-name" + "/path/to/your/file.txt";
String topicId = "your-pubsub-topic-id";
String subscriptionId = "your-pubsub-subscription-id";
inspectGcsFileWithSampling(projectId, gcsUri, topicId, subscriptionId);
}

// Inspects a file in a Google Cloud Storage Bucket.
public static void inspectGcsFileWithSampling(
String projectId, String gcsUri, String topicId, String subscriptionId)
throws ExecutionException, InterruptedException, IOException {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Specify the GCS file to be inspected and sampling configuration
CloudStorageOptions cloudStorageOptions =
CloudStorageOptions.newBuilder()
.setFileSet(FileSet.newBuilder().setUrl(gcsUri))
.setBytesLimitPerFile(200)
.addFileTypes(FileType.TEXT_FILE)
.setFilesLimitPercent(90)
.setSampleMethod(SampleMethod.RANDOM_START)
.build();

StorageConfig storageConfig =
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();

// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build();

// Specify how the content should be inspected.
InspectConfig inspectConfig =
InspectConfig.newBuilder()
.addInfoTypes(infoType)
.setExcludeInfoTypes(true)
.setIncludeQuote(true)
.setMinLikelihood(Likelihood.POSSIBLE)
.build();

// Specify the action that is triggered when the job completes.
String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId);
Action.PublishToPubSub publishToPubSub =
Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build();
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();

// Configure the long running job we want the service to perform.
InspectJobConfig inspectJobConfig =
InspectJobConfig.newBuilder()
.setStorageConfig(storageConfig)
.setInspectConfig(inspectConfig)
.addActions(action)
.build();

// Create the request for the job configured above.
CreateDlpJobRequest createDlpJobRequest =
CreateDlpJobRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setInspectJob(inspectJobConfig)
.build();

// Use the client to send the request.
final DlpJob dlpJob = dlp.createDlpJob(createDlpJobRequest);
System.out.println("Job created: " + dlpJob.getName());

// Set up a Pub/Sub subscriber to listen on the job completion status
final SettableApiFuture<Boolean> done = SettableApiFuture.create();

ProjectSubscriptionName subscriptionName =
ProjectSubscriptionName.of(projectId, subscriptionId);

MessageReceiver messageHandler =
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> {
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer);
};
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build();
subscriber.startAsync();

// Wait for the original job to complete
try {
done.get(15, TimeUnit.MINUTES);
} catch (TimeoutException e) {
System.out.println("Job was not completed after 15 minutes.");
return;
} finally {
subscriber.stopAsync();
subscriber.awaitTerminated();
}

// Get the latest state of the job from the service
GetDlpJobRequest request = GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build();
DlpJob completedJob = dlp.getDlpJob(request);

// Parse the response and process results.
System.out.println("Job status: " + completedJob.getState());
InspectDataSourceDetails.Result result = completedJob.getInspectDetails().getResult();
System.out.println("Findings: ");
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
System.out.println("\tCount: " + infoTypeStat.getCount());
}
}
}

// handleMessage injects the job and settableFuture into the message reciever interface
private static void handleMessage(
DlpJob job,
SettableApiFuture<Boolean> done,
PubsubMessage pubsubMessage,
AckReplyConsumer ackReplyConsumer) {
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName");
if (job.getName().equals(messageAttribute)) {
done.set(true);
ackReplyConsumer.ack();
} else {
ackReplyConsumer.nack();
}
}
}
// [END dlp_inspect_gcs]
10 changes: 10 additions & 0 deletions dlp/src/test/java/dlp/snippets/InspectTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,16 @@ public void testInspectGcsFile() throws Exception {
assertThat(output, containsString("Job status: DONE"));
}

@Test
public void testInspectGcsFileWithSampling() throws Exception {
InspectGcsFileWithSampling
.inspectGcsFileWithSampling(PROJECT_ID, GCS_PATH, topicName.getTopic(),
subscriptionName.getSubscription());

String output = bout.toString();
assertThat(output, containsString("Job status: DONE"));
}

@Test
public void testInspectDatastoreEntity() throws Exception {
InspectDatastoreEntity
Expand Down

0 comments on commit fe599dc

Please sign in to comment.