-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Inspect gcs sample b 157050959 #3146
Merged
gcf-merge-on-green
merged 3 commits into
GoogleCloudPlatform:master
from
sushicw:inspect-gcs-sample_b-157050959
Jun 11, 2020
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
174 changes: 174 additions & 0 deletions
174
dlp/src/main/java/dlp/snippets/InspectGcsFileWithSampling.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
/* | ||
* Copyright 2019 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package dlp.snippets; | ||
|
||
// [START dlp_inspect_gcs] | ||
|
||
import com.google.api.core.SettableApiFuture; | ||
import com.google.cloud.dlp.v2.DlpServiceClient; | ||
import com.google.cloud.pubsub.v1.AckReplyConsumer; | ||
import com.google.cloud.pubsub.v1.MessageReceiver; | ||
import com.google.cloud.pubsub.v1.Subscriber; | ||
import com.google.privacy.dlp.v2.Action; | ||
import com.google.privacy.dlp.v2.CloudStorageOptions; | ||
import com.google.privacy.dlp.v2.CloudStorageOptions.FileSet; | ||
import com.google.privacy.dlp.v2.CloudStorageOptions.SampleMethod; | ||
import com.google.privacy.dlp.v2.CreateDlpJobRequest; | ||
import com.google.privacy.dlp.v2.DlpJob; | ||
import com.google.privacy.dlp.v2.FileType; | ||
import com.google.privacy.dlp.v2.GetDlpJobRequest; | ||
import com.google.privacy.dlp.v2.InfoType; | ||
import com.google.privacy.dlp.v2.InfoTypeStats; | ||
import com.google.privacy.dlp.v2.InspectConfig; | ||
import com.google.privacy.dlp.v2.InspectDataSourceDetails; | ||
import com.google.privacy.dlp.v2.InspectJobConfig; | ||
import com.google.privacy.dlp.v2.Likelihood; | ||
import com.google.privacy.dlp.v2.LocationName; | ||
import com.google.privacy.dlp.v2.StorageConfig; | ||
import com.google.pubsub.v1.ProjectSubscriptionName; | ||
import com.google.pubsub.v1.PubsubMessage; | ||
import java.io.IOException; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.TimeoutException; | ||
|
||
public class InspectGcsFileWithSampling { | ||
|
||
public static void inspectGcsFileWithSampling() | ||
throws InterruptedException, ExecutionException, IOException { | ||
// TODO(developer): Replace these variables before running the sample. | ||
String projectId = "your-project-id"; | ||
String gcsUri = "gs://" + "your-bucket-name" + "/path/to/your/file.txt"; | ||
String topicId = "your-pubsub-topic-id"; | ||
String subscriptionId = "your-pubsub-subscription-id"; | ||
inspectGcsFileWithSampling(projectId, gcsUri, topicId, subscriptionId); | ||
} | ||
|
||
// Inspects a file in a Google Cloud Storage Bucket. | ||
public static void inspectGcsFileWithSampling( | ||
String projectId, String gcsUri, String topicId, String subscriptionId) | ||
throws ExecutionException, InterruptedException, IOException { | ||
// Initialize client that will be used to send requests. This client only needs to be created | ||
// once, and can be reused for multiple requests. After completing all of your requests, call | ||
// the "close" method on the client to safely clean up any remaining background resources. | ||
try (DlpServiceClient dlp = DlpServiceClient.create()) { | ||
// Specify the GCS file to be inspected and sampling configuration | ||
CloudStorageOptions cloudStorageOptions = | ||
CloudStorageOptions.newBuilder() | ||
.setFileSet(FileSet.newBuilder().setUrl(gcsUri)) | ||
.setBytesLimitPerFile(200) | ||
.addFileTypes(FileType.TEXT_FILE) | ||
.setFilesLimitPercent(90) | ||
.setSampleMethod(SampleMethod.RANDOM_START) | ||
.build(); | ||
|
||
StorageConfig storageConfig = | ||
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); | ||
|
||
// Specify the type of info the inspection will look for. | ||
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types | ||
InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build(); | ||
|
||
// Specify how the content should be inspected. | ||
InspectConfig inspectConfig = | ||
InspectConfig.newBuilder() | ||
.addInfoTypes(infoType) | ||
.setExcludeInfoTypes(true) | ||
.setIncludeQuote(true) | ||
.setMinLikelihood(Likelihood.POSSIBLE) | ||
.build(); | ||
|
||
// Specify the action that is triggered when the job completes. | ||
String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); | ||
Action.PublishToPubSub publishToPubSub = | ||
Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); | ||
Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); | ||
|
||
// Configure the long running job we want the service to perform. | ||
InspectJobConfig inspectJobConfig = | ||
InspectJobConfig.newBuilder() | ||
.setStorageConfig(storageConfig) | ||
.setInspectConfig(inspectConfig) | ||
.addActions(action) | ||
.build(); | ||
|
||
// Create the request for the job configured above. | ||
CreateDlpJobRequest createDlpJobRequest = | ||
CreateDlpJobRequest.newBuilder() | ||
.setParent(LocationName.of(projectId, "global").toString()) | ||
.setInspectJob(inspectJobConfig) | ||
.build(); | ||
|
||
// Use the client to send the request. | ||
final DlpJob dlpJob = dlp.createDlpJob(createDlpJobRequest); | ||
System.out.println("Job created: " + dlpJob.getName()); | ||
|
||
// Set up a Pub/Sub subscriber to listen on the job completion status | ||
final SettableApiFuture<Boolean> done = SettableApiFuture.create(); | ||
|
||
ProjectSubscriptionName subscriptionName = | ||
ProjectSubscriptionName.of(projectId, subscriptionId); | ||
|
||
MessageReceiver messageHandler = | ||
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> { | ||
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer); | ||
}; | ||
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build(); | ||
subscriber.startAsync(); | ||
|
||
// Wait for the original job to complete | ||
try { | ||
done.get(15, TimeUnit.MINUTES); | ||
} catch (TimeoutException e) { | ||
System.out.println("Job was not completed after 15 minutes."); | ||
return; | ||
} finally { | ||
subscriber.stopAsync(); | ||
subscriber.awaitTerminated(); | ||
} | ||
|
||
// Get the latest state of the job from the service | ||
GetDlpJobRequest request = GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build(); | ||
DlpJob completedJob = dlp.getDlpJob(request); | ||
|
||
// Parse the response and process results. | ||
System.out.println("Job status: " + completedJob.getState()); | ||
InspectDataSourceDetails.Result result = completedJob.getInspectDetails().getResult(); | ||
System.out.println("Findings: "); | ||
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { | ||
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); | ||
System.out.println("\tCount: " + infoTypeStat.getCount()); | ||
} | ||
} | ||
} | ||
|
||
// handleMessage injects the job and settableFuture into the message reciever interface | ||
private static void handleMessage( | ||
DlpJob job, | ||
SettableApiFuture<Boolean> done, | ||
PubsubMessage pubsubMessage, | ||
AckReplyConsumer ackReplyConsumer) { | ||
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName"); | ||
if (job.getName().equals(messageAttribute)) { | ||
done.set(true); | ||
ackReplyConsumer.ack(); | ||
} else { | ||
ackReplyConsumer.nack(); | ||
} | ||
} | ||
} | ||
// [END dlp_inspect_gcs] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there something a user should do here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For now I'm keeping this consistent with the other existing samples. Obviously "println and return" is probably not the right answer for most prod systems, but it does keep the sample simple. Hopefully it's easy for users to tune to their own needs.
As a broader effort, it would be great to have a consistent approach to recommend across samples, across APIs, and (ideally) across languages as well.