-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Inspect gcs sample b 157050959 (#3146)
Fixes internal bug b/157050959 - [X] I have followed [Sample Format Guide](https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/SAMPLE_FORMAT.md) - [X] `pom.xml` parent set to latest `shared-configuration` - [X] Appropriate changes to README are included in PR - [X] API's need to be enabled to test (tell us) (**Nothing new**) - [X] Environment Variables need to be set (ask us to set them) (**Nothing new**) - [X] **Tests** pass: `mvn clean verify` **required** - [X] **Lint** passes: `mvn -P lint checkstyle:check` **required** - [X] **Static Analysis**: `mvn -P lint clean compile pmd:cpd-check spotbugs:check` **advisory only** - [X] Please **merge** this PR for me once it is approved.
- Loading branch information
Showing
3 changed files
with
188 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
174 changes: 174 additions & 0 deletions
174
dlp/src/main/java/dlp/snippets/InspectGcsFileWithSampling.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
/* | ||
* Copyright 2019 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package dlp.snippets; | ||
|
||
// [START dlp_inspect_gcs] | ||
|
||
import com.google.api.core.SettableApiFuture; | ||
import com.google.cloud.dlp.v2.DlpServiceClient; | ||
import com.google.cloud.pubsub.v1.AckReplyConsumer; | ||
import com.google.cloud.pubsub.v1.MessageReceiver; | ||
import com.google.cloud.pubsub.v1.Subscriber; | ||
import com.google.privacy.dlp.v2.Action; | ||
import com.google.privacy.dlp.v2.CloudStorageOptions; | ||
import com.google.privacy.dlp.v2.CloudStorageOptions.FileSet; | ||
import com.google.privacy.dlp.v2.CloudStorageOptions.SampleMethod; | ||
import com.google.privacy.dlp.v2.CreateDlpJobRequest; | ||
import com.google.privacy.dlp.v2.DlpJob; | ||
import com.google.privacy.dlp.v2.FileType; | ||
import com.google.privacy.dlp.v2.GetDlpJobRequest; | ||
import com.google.privacy.dlp.v2.InfoType; | ||
import com.google.privacy.dlp.v2.InfoTypeStats; | ||
import com.google.privacy.dlp.v2.InspectConfig; | ||
import com.google.privacy.dlp.v2.InspectDataSourceDetails; | ||
import com.google.privacy.dlp.v2.InspectJobConfig; | ||
import com.google.privacy.dlp.v2.Likelihood; | ||
import com.google.privacy.dlp.v2.LocationName; | ||
import com.google.privacy.dlp.v2.StorageConfig; | ||
import com.google.pubsub.v1.ProjectSubscriptionName; | ||
import com.google.pubsub.v1.PubsubMessage; | ||
import java.io.IOException; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.TimeoutException; | ||
|
||
public class InspectGcsFileWithSampling { | ||
|
||
public static void inspectGcsFileWithSampling() | ||
throws InterruptedException, ExecutionException, IOException { | ||
// TODO(developer): Replace these variables before running the sample. | ||
String projectId = "your-project-id"; | ||
String gcsUri = "gs://" + "your-bucket-name" + "/path/to/your/file.txt"; | ||
String topicId = "your-pubsub-topic-id"; | ||
String subscriptionId = "your-pubsub-subscription-id"; | ||
inspectGcsFileWithSampling(projectId, gcsUri, topicId, subscriptionId); | ||
} | ||
|
||
// Inspects a file in a Google Cloud Storage Bucket. | ||
public static void inspectGcsFileWithSampling( | ||
String projectId, String gcsUri, String topicId, String subscriptionId) | ||
throws ExecutionException, InterruptedException, IOException { | ||
// Initialize client that will be used to send requests. This client only needs to be created | ||
// once, and can be reused for multiple requests. After completing all of your requests, call | ||
// the "close" method on the client to safely clean up any remaining background resources. | ||
try (DlpServiceClient dlp = DlpServiceClient.create()) { | ||
// Specify the GCS file to be inspected and sampling configuration | ||
CloudStorageOptions cloudStorageOptions = | ||
CloudStorageOptions.newBuilder() | ||
.setFileSet(FileSet.newBuilder().setUrl(gcsUri)) | ||
.setBytesLimitPerFile(200) | ||
.addFileTypes(FileType.TEXT_FILE) | ||
.setFilesLimitPercent(90) | ||
.setSampleMethod(SampleMethod.RANDOM_START) | ||
.build(); | ||
|
||
StorageConfig storageConfig = | ||
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); | ||
|
||
// Specify the type of info the inspection will look for. | ||
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types | ||
InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build(); | ||
|
||
// Specify how the content should be inspected. | ||
InspectConfig inspectConfig = | ||
InspectConfig.newBuilder() | ||
.addInfoTypes(infoType) | ||
.setExcludeInfoTypes(true) | ||
.setIncludeQuote(true) | ||
.setMinLikelihood(Likelihood.POSSIBLE) | ||
.build(); | ||
|
||
// Specify the action that is triggered when the job completes. | ||
String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); | ||
Action.PublishToPubSub publishToPubSub = | ||
Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); | ||
Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); | ||
|
||
// Configure the long running job we want the service to perform. | ||
InspectJobConfig inspectJobConfig = | ||
InspectJobConfig.newBuilder() | ||
.setStorageConfig(storageConfig) | ||
.setInspectConfig(inspectConfig) | ||
.addActions(action) | ||
.build(); | ||
|
||
// Create the request for the job configured above. | ||
CreateDlpJobRequest createDlpJobRequest = | ||
CreateDlpJobRequest.newBuilder() | ||
.setParent(LocationName.of(projectId, "global").toString()) | ||
.setInspectJob(inspectJobConfig) | ||
.build(); | ||
|
||
// Use the client to send the request. | ||
final DlpJob dlpJob = dlp.createDlpJob(createDlpJobRequest); | ||
System.out.println("Job created: " + dlpJob.getName()); | ||
|
||
// Set up a Pub/Sub subscriber to listen on the job completion status | ||
final SettableApiFuture<Boolean> done = SettableApiFuture.create(); | ||
|
||
ProjectSubscriptionName subscriptionName = | ||
ProjectSubscriptionName.of(projectId, subscriptionId); | ||
|
||
MessageReceiver messageHandler = | ||
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> { | ||
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer); | ||
}; | ||
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build(); | ||
subscriber.startAsync(); | ||
|
||
// Wait for the original job to complete | ||
try { | ||
done.get(15, TimeUnit.MINUTES); | ||
} catch (TimeoutException e) { | ||
System.out.println("Job was not completed after 15 minutes."); | ||
return; | ||
} finally { | ||
subscriber.stopAsync(); | ||
subscriber.awaitTerminated(); | ||
} | ||
|
||
// Get the latest state of the job from the service | ||
GetDlpJobRequest request = GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build(); | ||
DlpJob completedJob = dlp.getDlpJob(request); | ||
|
||
// Parse the response and process results. | ||
System.out.println("Job status: " + completedJob.getState()); | ||
InspectDataSourceDetails.Result result = completedJob.getInspectDetails().getResult(); | ||
System.out.println("Findings: "); | ||
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { | ||
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); | ||
System.out.println("\tCount: " + infoTypeStat.getCount()); | ||
} | ||
} | ||
} | ||
|
||
// handleMessage injects the job and settableFuture into the message reciever interface | ||
private static void handleMessage( | ||
DlpJob job, | ||
SettableApiFuture<Boolean> done, | ||
PubsubMessage pubsubMessage, | ||
AckReplyConsumer ackReplyConsumer) { | ||
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName"); | ||
if (job.getName().equals(messageAttribute)) { | ||
done.set(true); | ||
ackReplyConsumer.ack(); | ||
} else { | ||
ackReplyConsumer.nack(); | ||
} | ||
} | ||
} | ||
// [END dlp_inspect_gcs] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters