-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new DLP samples (Bigquery, DeID, Risk Analysis) #841
Changes from 5 commits
7f531ae
eca2c8b
3c8a224
2a6148b
28ae829
7e439bc
18038cb
f7ba95e
bd6eb95
b5ebdab
2c4bc7d
f34ddd4
30f0028
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,237 @@ | ||
/** | ||
* Copyright 2017 Google Inc. | ||
* <p> | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* <p> | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* <p> | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.example.dlp; | ||
|
||
import com.google.cloud.dlp.v2beta1.DlpServiceClient; | ||
import com.google.common.io.BaseEncoding; | ||
import com.google.privacy.dlp.v2beta1.*; | ||
import com.google.privacy.dlp.v2beta1.InfoTypeTransformations.InfoTypeTransformation; | ||
import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; | ||
import com.google.protobuf.ByteString; | ||
import org.apache.commons.cli.*; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please avoid .* imports There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
public class DeId { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given this short name, I recommend renaming to DeIdentification There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
private static void deidentifyWithMask(String string, Character maskingCharacter, int numberToMask) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deidentify => deIdentify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
// [START dlp_deidentify_mask] | ||
// instantiate a client | ||
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { | ||
|
||
// The string to deidentify | ||
// string = "My SSN is 372819127"; | ||
|
||
// (Optional) The maximum number of sensitive characters to mask in a match | ||
// If omitted from the request or set to 0, the API will mask any matching characters | ||
// numberToMask = 5; | ||
|
||
// (Optional) The character to mask matching sensitive data with | ||
// maskingCharacter = 'x'; | ||
|
||
ContentItem contentItem = | ||
ContentItem.newBuilder() | ||
.setType("text/plain") | ||
.setValue(string) | ||
.build(); | ||
|
||
CharacterMaskConfig characterMaskConfig = | ||
CharacterMaskConfig.newBuilder() | ||
.setMaskingCharacter(maskingCharacter.toString()) | ||
.setNumberToMask(numberToMask) | ||
.build(); | ||
|
||
PrimitiveTransformation primitiveTransformation = | ||
PrimitiveTransformation.newBuilder() | ||
.setCharacterMaskConfig(characterMaskConfig) | ||
.build(); | ||
|
||
InfoTypeTransformation infoTypeTransformationObject = | ||
InfoTypeTransformation.newBuilder() | ||
.setPrimitiveTransformation(primitiveTransformation) | ||
.build(); | ||
|
||
InfoTypeTransformations infoTypeTransformationArray = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given all these objects that get created just to create a request, it would be to provide a comment above each to state what it is. It is unfortunate that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had similar comments with the Node client libraries, to be honest. I'll follow up with the API/product team(s) about this. (TODO, so I can CTRL-F for this comment.) |
||
InfoTypeTransformations.newBuilder() | ||
.addTransformations(infoTypeTransformationObject) | ||
.build(); | ||
|
||
DeidentifyConfig deidentifyConfig = | ||
DeidentifyConfig.newBuilder() | ||
.setInfoTypeTransformations(infoTypeTransformationArray) | ||
.build(); | ||
|
||
DeidentifyContentRequest request = | ||
DeidentifyContentRequest.newBuilder() | ||
.setDeidentifyConfig(deidentifyConfig) | ||
.addItems(contentItem) | ||
.build(); | ||
|
||
DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); | ||
|
||
for (ContentItem item : response.getItemsList()) { | ||
System.out.println(item.getValue()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and example would be useful as a comment on what to expect. |
||
} | ||
} catch (Exception e) { | ||
System.out.println("Error in deidentifyWithMask: " + e.getMessage()); | ||
} | ||
// [END dlp_deidentify_mask] | ||
} | ||
|
||
private static void deidentifyWithFpe( | ||
String string, CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. given the import, is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done - removed the prefix. |
||
// [START dlp_deidentify_fpe] | ||
// instantiate a client | ||
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { | ||
|
||
// The string to deidentify | ||
// string = "My SSN is 372819127"; | ||
|
||
// The set of characters to replace sensitive ones with | ||
// For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify#FfxCommonNativeAlphabet | ||
// alphabet = FfxCommonNativeAlphabet.ALPHA_NUMERIC; | ||
|
||
// The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key | ||
// keyName = "projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME"; | ||
|
||
// The encrypted ('wrapped') AES-256 key to use | ||
// This key should be encrypted using the Cloud KMS key specified above | ||
// const wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" | ||
|
||
ContentItem contentItem = | ||
ContentItem.newBuilder() | ||
.setType("text/plain") | ||
.setValue(string) | ||
.build(); | ||
|
||
KmsWrappedCryptoKey kmsWrappedCryptoKey = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as earlier, better comments around these objects and ideally some of these can get cleaned up on the surface. This looks very cumbersome to the user. |
||
KmsWrappedCryptoKey.newBuilder() | ||
.setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) | ||
.setCryptoKeyName(keyName) | ||
.build(); | ||
|
||
CryptoKey cryptoKey = | ||
CryptoKey.newBuilder() | ||
.setKmsWrapped(kmsWrappedCryptoKey) | ||
.build(); | ||
|
||
CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig = | ||
CryptoReplaceFfxFpeConfig.newBuilder() | ||
.setCryptoKey(cryptoKey) | ||
.setCommonAlphabet(alphabet) | ||
.build(); | ||
|
||
PrimitiveTransformation primitiveTransformation = | ||
PrimitiveTransformation.newBuilder() | ||
.setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig) | ||
.build(); | ||
|
||
InfoTypeTransformation infoTypeTransformationObject = | ||
InfoTypeTransformation.newBuilder() | ||
.setPrimitiveTransformation(primitiveTransformation) | ||
.build(); | ||
|
||
InfoTypeTransformations infoTypeTransformationArray = | ||
InfoTypeTransformations.newBuilder() | ||
.addTransformations(infoTypeTransformationObject) | ||
.build(); | ||
|
||
DeidentifyConfig deidentifyConfig = | ||
DeidentifyConfig.newBuilder() | ||
.setInfoTypeTransformations(infoTypeTransformationArray) | ||
.build(); | ||
|
||
DeidentifyContentRequest request = | ||
DeidentifyContentRequest.newBuilder() | ||
.setDeidentifyConfig(deidentifyConfig) | ||
.addItems(contentItem) | ||
.build(); | ||
|
||
DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); | ||
|
||
for (ContentItem item : response.getItemsList()) { | ||
System.out.println(item.getValue()); | ||
} | ||
} catch (Exception e) { | ||
System.out.println("Error in deidentifyWithFpe: " + e.getMessage()); | ||
} | ||
// [END dlp_deidentify_fpe] | ||
} | ||
|
||
/** | ||
* Command line application to de-identify data using the Data Loss Prevention API. | ||
* Supported data format: strings | ||
*/ | ||
public static void main(String[] args) throws Exception { | ||
|
||
OptionGroup optionsGroup = new OptionGroup(); | ||
optionsGroup.setRequired(true); | ||
|
||
Option deidentifyMaskingOption = new Option("m", "mask", true, "deid with character masking"); | ||
optionsGroup.addOption(deidentifyMaskingOption); | ||
|
||
Option deidentifyFpeOption = new Option("f", "fpe", true, "deid with FFX FPE"); | ||
optionsGroup.addOption(deidentifyFpeOption); | ||
|
||
Options commandLineOptions = new Options(); | ||
commandLineOptions.addOptionGroup(optionsGroup); | ||
|
||
Option maskingCharacterOption = Option.builder("maskingCharacter").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(maskingCharacterOption); | ||
|
||
Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(numberToMaskOption); | ||
|
||
Option alphabetOption = Option.builder("commonAlphabet").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(alphabetOption); | ||
|
||
Option wrappedKeyOption = Option.builder("wrappedKey").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(wrappedKeyOption); | ||
|
||
Option keyNameOption = Option.builder("keyName").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(keyNameOption); | ||
|
||
CommandLineParser parser = new DefaultParser(); | ||
HelpFormatter formatter = new HelpFormatter(); | ||
CommandLine cmd; | ||
|
||
try { | ||
cmd = parser.parse(commandLineOptions, args); | ||
} catch (ParseException e) { | ||
System.out.println(e.getMessage()); | ||
formatter.printHelp(DeId.class.getName(), commandLineOptions); | ||
System.exit(1); | ||
return; | ||
} | ||
|
||
if (cmd.hasOption("m")) { | ||
// deid with character masking | ||
int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); | ||
char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); | ||
String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); | ||
deidentifyWithMask(val, maskingCharacter, numberToMask); | ||
} else if (cmd.hasOption("f")) { | ||
// deid with FPE | ||
String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); | ||
String keyName = cmd.getOptionValue(keyNameOption.getOpt()); | ||
String val = cmd.getOptionValue(deidentifyFpeOption.getOpt()); | ||
FfxCommonNativeAlphabet alphabet = | ||
FfxCommonNativeAlphabet.valueOf( | ||
cmd.getOptionValue( | ||
alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); | ||
deidentifyWithFpe(val, alphabet, keyName, wrappedKey); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,24 +20,8 @@ | |
import com.google.cloud.ServiceOptions; | ||
import com.google.cloud.dlp.v2beta1.DlpServiceClient; | ||
import com.google.longrunning.Operation; | ||
import com.google.privacy.dlp.v2beta1.CloudStorageOptions; | ||
import com.google.privacy.dlp.v2beta1.*; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove .* import. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
import com.google.privacy.dlp.v2beta1.CloudStorageOptions.FileSet; | ||
import com.google.privacy.dlp.v2beta1.ContentItem; | ||
import com.google.privacy.dlp.v2beta1.DatastoreOptions; | ||
import com.google.privacy.dlp.v2beta1.Finding; | ||
import com.google.privacy.dlp.v2beta1.InfoType; | ||
import com.google.privacy.dlp.v2beta1.InspectConfig; | ||
import com.google.privacy.dlp.v2beta1.InspectContentRequest; | ||
import com.google.privacy.dlp.v2beta1.InspectContentResponse; | ||
import com.google.privacy.dlp.v2beta1.InspectOperationMetadata; | ||
import com.google.privacy.dlp.v2beta1.InspectOperationResult; | ||
import com.google.privacy.dlp.v2beta1.InspectResult; | ||
import com.google.privacy.dlp.v2beta1.KindExpression; | ||
import com.google.privacy.dlp.v2beta1.Likelihood; | ||
import com.google.privacy.dlp.v2beta1.OutputStorageConfig; | ||
import com.google.privacy.dlp.v2beta1.PartitionId; | ||
import com.google.privacy.dlp.v2beta1.ResultName; | ||
import com.google.privacy.dlp.v2beta1.StorageConfig; | ||
import com.google.protobuf.ByteString; | ||
import java.net.URLConnection; | ||
import java.nio.file.Files; | ||
|
@@ -332,9 +316,79 @@ private static void inspectDatastore( | |
// [END dlp_inspect_datastore] | ||
} | ||
|
||
private static void inspectBigquery( | ||
String projectId, | ||
String datasetId, | ||
String tableId, | ||
Likelihood minLikelihood, | ||
List<InfoType> infoTypes) { | ||
// [START dlp_inspect_bigquery] | ||
// Instantiates a client | ||
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { | ||
|
||
// (Optional) The project ID to run the API call under | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You might wish to mention the API's if you are going to mention this much stuff. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps in a more JavaDoc way and move the tag earlier. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
// projectId = my-project-id | ||
|
||
// The ID of the dataset to inspect, e.g. 'my_dataset' | ||
// datasetId = "my_dataset"; | ||
|
||
// The ID of the table to inspect, e.g. 'my_table' | ||
// tableId = "my_table"; | ||
|
||
// The minimum likelihood required before returning a match | ||
// minLikelihood = LIKELIHOOD_UNSPECIFIED; | ||
|
||
// The infoTypes of information to match | ||
// infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; | ||
|
||
// Reference to the BigQuery table | ||
BigQueryTable tableReference = | ||
BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build(); | ||
BigQueryOptions bigQueryOptions = | ||
BigQueryOptions.newBuilder().setTableReference(tableReference).build(); | ||
|
||
// Construct BigQuery configuration to be inspected | ||
StorageConfig storageConfig = | ||
StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); | ||
|
||
InspectConfig inspectConfig = | ||
InspectConfig.newBuilder() | ||
.addAllInfoTypes(infoTypes) | ||
.setMinLikelihood(minLikelihood) | ||
.build(); | ||
|
||
// optionally provide an output configuration to store results, default : none | ||
OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); | ||
|
||
// asynchronously submit an inspect operation | ||
OperationFuture<InspectOperationResult, InspectOperationMetadata, Operation> responseFuture = | ||
dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); | ||
|
||
// ... | ||
// block on response, returning job id of the operation | ||
InspectOperationResult inspectOperationResult = responseFuture.get(); | ||
ResultName resultName = inspectOperationResult.getNameAsResultName(); | ||
InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); | ||
|
||
if (inspectResult.getFindingsCount() > 0) { | ||
System.out.println("Findings: "); | ||
for (Finding finding : inspectResult.getFindingsList()) { | ||
System.out.print("\tInfo type: " + finding.getInfoType().getName()); | ||
System.out.println("\tLikelihood: " + finding.getLikelihood()); | ||
} | ||
} else { | ||
System.out.println("No findings."); | ||
} | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
System.out.println("Error in inspectBigguery: " + e.getMessage()); | ||
} | ||
// [END dlp_inspect_bigquery] | ||
} | ||
|
||
/** | ||
* Command line application to inspect data using the Data Loss Prevention API. | ||
* Supported data formats : string, file, text files on GCS and Datastore entities | ||
* Supported data formats: strings, files, text files on GCS, BigQuery tables, and Datastore entities | ||
*/ | ||
public static void main(String[] args) throws Exception { | ||
|
||
|
@@ -352,6 +406,9 @@ public static void main(String[] args) throws Exception { | |
Option datastoreOption = new Option("ds", "Google Datastore", false, "inspect Datastore kind"); | ||
optionsGroup.addOption(datastoreOption); | ||
|
||
Option bigqueryOption = new Option("bq", "Google BigQuery", false, "inspect BigQuery table"); | ||
optionsGroup.addOption(bigqueryOption); | ||
|
||
Options commandLineOptions = new Options(); | ||
commandLineOptions.addOptionGroup(optionsGroup); | ||
|
||
|
@@ -377,9 +434,15 @@ public static void main(String[] args) throws Exception { | |
Option gcsFileNameOption = Option.builder("fileName").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(gcsFileNameOption); | ||
|
||
Option datastoreProjectIdOption = | ||
Option datasetIdOption = Option.builder("datasetId").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(datasetIdOption); | ||
|
||
Option tableIdOption = Option.builder("tableId").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(tableIdOption); | ||
|
||
Option projectIdOption = | ||
Option.builder("projectId").hasArg(true).required(false).build(); | ||
commandLineOptions.addOption(datastoreProjectIdOption); | ||
commandLineOptions.addOption(projectIdOption); | ||
|
||
Option datastoreNamespaceOption = | ||
Option.builder("namespace").hasArg(true).required(false).build(); | ||
|
@@ -436,8 +499,16 @@ public static void main(String[] args) throws Exception { | |
// use default project id when project id is not specified | ||
String projectId = | ||
cmd.getOptionValue( | ||
datastoreProjectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); | ||
projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); | ||
inspectDatastore(projectId, namespaceId, kind, minLikelihood, infoTypesList); | ||
} else if (cmd.hasOption("bq")) { | ||
String datasetId = cmd.getOptionValue(datasetIdOption.getOpt()); | ||
String tableId = cmd.getOptionValue(tableIdOption.getOpt()); | ||
// use default project id when project id is not specified | ||
String projectId = | ||
cmd.getOptionValue( | ||
projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); | ||
inspectBigquery(projectId, datasetId, tableId, minLikelihood, infoTypesList); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please avoid .* imports
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.