Skip to content

Commit

Permalink
feat(privacy report): pass flag option for subject mapping override (#…
Browse files Browse the repository at this point in the history
…478)

* feat: pass config flag for subject mapping override

* chore: update scan flag docs

* fix: update flag snapshots
  • Loading branch information
elsapet authored Feb 2, 2023
1 parent b62642f commit 1fea8d8
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 7 deletions.
3 changes: 3 additions & 0 deletions docs/_data/curio_scan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ options:
- name: context
usage: |
Expand context of schema classification e.g., --context=health, to include data types particular to health
- name: data-subject-mapping
usage: |
Override default data subject mapping by providing a path to a custom mapping JSON file
- name: debug
default_value: "false"
usage: Enable debug logs
Expand Down
1 change: 1 addition & 0 deletions integration/flags/.snapshots/TestInitCommand
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ rule:
skip-rule: []
scan:
context: ""
data_subject_mapping: ""
debug: false
disable-domain-resolution: true
domain-resolution-timeout: 3s
Expand Down
1 change: 1 addition & 0 deletions integration/flags/.snapshots/TestMetadataFlags-help-scan
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Rule Flags

Scan Flags
--context string Expand context of schema classification e.g., --context=health, to include data types particular to health
--data-subject-mapping string Override default data subject mapping by providing a path to a custom mapping JSON file
--debug Enable debug logs
--disable-domain-resolution Do not attempt to resolve detected domains during classification (default true)
--domain-resolution-timeout duration Set timeout when attempting to resolve detected domains during classification, e.g. --domain-resolution-timeout=3s (default 3s)
Expand Down
1 change: 1 addition & 0 deletions integration/flags/.snapshots/TestMetadataFlags-scan-help
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Rule Flags

Scan Flags
--context string Expand context of schema classification e.g., --context=health, to include data types particular to health
--data-subject-mapping string Override default data subject mapping by providing a path to a custom mapping JSON file
--debug Enable debug logs
--disable-domain-resolution Do not attempt to resolve detected domains during classification (default true)
--domain-resolution-timeout duration Set timeout when attempting to resolve detected domains during classification, e.g. --domain-resolution-timeout=3s (default 3s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Rule Flags

Scan Flags
--context string Expand context of schema classification e.g., --context=health, to include data types particular to health
--data-subject-mapping string Override default data subject mapping by providing a path to a custom mapping JSON file
--debug Enable debug logs
--disable-domain-resolution Do not attempt to resolve detected domains during classification (default true)
--domain-resolution-timeout duration Set timeout when attempting to resolve detected domains during classification, e.g. --domain-resolution-timeout=3s (default 3s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Rule Flags

Scan Flags
--context string Expand context of schema classification e.g., --context=health, to include data types particular to health
--data-subject-mapping string Override default data subject mapping by providing a path to a custom mapping JSON file
--debug Enable debug logs
--disable-domain-resolution Do not attempt to resolve detected domains during classification (default true)
--domain-resolution-timeout duration Set timeout when attempting to resolve detected domains during classification, e.g. --domain-resolution-timeout=3s (default 3s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Rule Flags

Scan Flags
--context string Expand context of schema classification e.g., --context=health, to include data types particular to health
--data-subject-mapping string Override default data subject mapping by providing a path to a custom mapping JSON file
--debug Enable debug logs
--disable-domain-resolution Do not attempt to resolve detected domains during classification (default true)
--domain-resolution-timeout duration Set timeout when attempting to resolve detected domains during classification, e.g. --domain-resolution-timeout=3s (default 3s)
Expand Down
10 changes: 9 additions & 1 deletion pkg/classification/classification.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,19 @@ func NewClassifier(config *Config) (*Classifier, error) {
return nil, err
}

// apply subject mapping override, if present
var knownPersonObjectPatterns []db.KnownPersonObjectPattern
if config.Config.Scan.DataSubjectMapping != "" {
knownPersonObjectPatterns = db.DefaultWithMapping(config.Config.Scan.DataSubjectMapping).KnownPersonObjectPatterns
} else {
knownPersonObjectPatterns = db.Default().KnownPersonObjectPatterns
}

schemaClassifier := schema.New(
schema.Config{
DataTypes: db.Default().DataTypes,
DataTypeClassificationPatterns: db.Default().DataTypeClassificationPatterns,
KnownPersonObjectPatterns: db.Default().KnownPersonObjectPatterns,
KnownPersonObjectPatterns: knownPersonObjectPatterns,
Context: config.Config.Scan.Context,
},
)
Expand Down
22 changes: 16 additions & 6 deletions pkg/classification/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"embed"
"encoding/json"
"log"
"os"
"regexp"
"strings"

Expand Down Expand Up @@ -132,21 +133,25 @@ type KnownPersonObjectPattern struct {
}

func Default() DefaultDB {
return defaultDB("")
return defaultDB("", "")
}

func DefaultWithMapping(subjectMappingPath string) DefaultDB {
return defaultDB("", subjectMappingPath)
}

func DefaultWithContext(context flag.Context) DefaultDB {
return defaultDB(context)
return defaultDB(context, "")
}

func defaultDB(context flag.Context) DefaultDB {
func defaultDB(context flag.Context, subjectMappingPath string) DefaultDB {
dataTypes := defaultDataTypes()
return DefaultDB{
Recipes: defaultRecipes(),
DataTypes: dataTypes,
DataCategories: defaultDataCategories(context),
DataTypeClassificationPatterns: defaultDataTypeClassificationPatterns(dataTypes),
KnownPersonObjectPatterns: defaultKnownPersonObjectPatterns(dataTypes),
KnownPersonObjectPatterns: defaultKnownPersonObjectPatterns(dataTypes, subjectMappingPath),
}
}

Expand Down Expand Up @@ -332,7 +337,7 @@ func defaultDataTypeClassificationPatterns(dataTypes []DataType) []DataTypeClass
return dataTypeClassificationPatterns
}

func defaultKnownPersonObjectPatterns(dataTypes []DataType) []KnownPersonObjectPattern {
func defaultKnownPersonObjectPatterns(dataTypes []DataType, subjectMappingPath string) []KnownPersonObjectPattern {
knownPersonObjectPatterns := []KnownPersonObjectPattern{}

// "Identification" > "Unique Identifier" data type
Expand All @@ -352,7 +357,12 @@ func defaultKnownPersonObjectPatterns(dataTypes []DataType) []KnownPersonObjectP
}

// read mapping
subjectMappingJson, err := subjectMappingFile.ReadFile("subject_mapping.json")
var subjectMappingJson []byte
if subjectMappingPath != "" {
subjectMappingJson, err = os.ReadFile(subjectMappingPath)
} else {
subjectMappingJson, err = subjectMappingFile.ReadFile("subject_mapping.json")
}
if err != nil {
handleError(err)
}
Expand Down
11 changes: 11 additions & 0 deletions pkg/flag/scan_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ var (
Value: "",
Usage: "Expand context of schema classification e.g., --context=health, to include data types particular to health",
}
DataSubjectMappingFlag = Flag{
Name: "data-subject-mapping",
ConfigName: "scan.data_subject_mapping",
Value: "",
Usage: "Override default data subject mapping by providing a path to a custom mapping JSON file",
}
QuietFlag = Flag{
Name: "quiet",
ConfigName: "scan.quiet",
Expand Down Expand Up @@ -79,6 +85,7 @@ type ScanFlagGroup struct {
DomainResolutionTimeoutFlag *Flag
InternalDomainsFlag *Flag
ContextFlag *Flag
DataSubjectMappingFlag *Flag
QuietFlag *Flag
ForceFlag *Flag
ExternalRuleDirFlag *Flag
Expand All @@ -92,6 +99,7 @@ type ScanOptions struct {
DomainResolutionTimeout time.Duration `mapstructure:"domain-resolution-timeout" json:"domain-resolution-timeout" yaml:"domain-resolution-timeout"`
InternalDomains []string `mapstructure:"internal-domains" json:"internal-domains" yaml:"internal-domains"`
Context Context `mapstructure:"context" json:"context" yaml:"context"`
DataSubjectMapping string `mapstructure:"data_subject_mapping" json:"data_subject_mapping" yaml:"data_subject_mapping"`
Quiet bool `mapstructure:"quiet" json:"quiet" yaml:"quiet"`
Force bool `mapstructure:"force" json:"force" yaml:"force"`
ExternalRuleDir []string `mapstructure:"external-rule-dir" json:"external-rule-dir" yaml:"external-rule-dir"`
Expand All @@ -105,6 +113,7 @@ func NewScanFlagGroup() *ScanFlagGroup {
DomainResolutionTimeoutFlag: &DomainResolutionTimeoutFlag,
InternalDomainsFlag: &InternalDomainsFlag,
ContextFlag: &ContextFlag,
DataSubjectMappingFlag: &DataSubjectMappingFlag,
QuietFlag: &QuietFlag,
ForceFlag: &ForceFlag,
ExternalRuleDirFlag: &ExternalRuleDirFlag,
Expand All @@ -123,6 +132,7 @@ func (f *ScanFlagGroup) Flags() []*Flag {
f.DomainResolutionTimeoutFlag,
f.InternalDomainsFlag,
f.ContextFlag,
f.DataSubjectMappingFlag,
f.QuietFlag,
f.ForceFlag,
f.ExternalRuleDirFlag,
Expand All @@ -149,6 +159,7 @@ func (f *ScanFlagGroup) ToOptions(args []string) (ScanOptions, error) {
DomainResolutionTimeout: getDuration(f.DomainResolutionTimeoutFlag),
InternalDomains: getStringSlice(f.InternalDomainsFlag),
Context: context,
DataSubjectMapping: getString(f.DataSubjectMappingFlag),
Quiet: getBool(f.QuietFlag),
Force: getBool(f.ForceFlag),
Target: target,
Expand Down

0 comments on commit 1fea8d8

Please sign in to comment.