Skip to content

Commit

Permalink
feat: Allow users to override comment delimiters (#900)
Browse files Browse the repository at this point in the history
Allow users to specify the comment delimiters they are using in their
documentation. vale replaces these with HTML comment tags before
linting, making it possible to control style rules for specific passages
of prose in file formats that use non-HTML comment syntax. This is
critical for controlling style rules within a page in MDX, and
potentially other formats as well.

This example configures the `CommentDelimiters` field for `*.md` files,
indicating that `{/*` and `*/}` are the custom comment delimiters:

```ini
[*.md]
CommentDelimiters = "{/*,*/}"
```

Internally, custom delimiters are represented as a `[2]string`, and it
is only possible to configure one set of custom comment delimiters for a
given format block.

More specific changes:
- Refactor `applyPatterns`. Remove the method receiver and take only the
  necessary fields of `*core.Config` as parameters. This makes it easier
  to test `applyPatterns` without mocking an entire `*core.Config`. Also
  extract functions for `applyInlinePatterns` and `applyBlockPatterns`
  so we can use fewer arguments in a single function.
- Add `applyCommentPatterns`, which works similarly to
  `applyInlinePatterns` and `applyBlockPatterns`, but for substituting
  comments.
- For tests, import `https//github.com/stretchr/testify/assert`, a
  popular testing library, to get richer test output.

Closes #762
  • Loading branch information
ptgott authored Oct 9, 2024
1 parent b05b065 commit b6df01b
Show file tree
Hide file tree
Showing 8 changed files with 400 additions and 52 deletions.
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/errata-ai/ini v1.63.0
github.com/errata-ai/regexp2 v1.7.0
github.com/gobwas/glob v0.2.3
github.com/jdkato/go-tree-sitter-julia v0.1.0
github.com/jdkato/twine v0.10.1
github.com/karrick/godirwalk v1.16.1
github.com/mholt/archiver/v3 v3.5.1
Expand All @@ -23,6 +24,7 @@ require (
github.com/remeh/sizedwaitgroup v1.0.0
github.com/smacker/go-tree-sitter v0.0.0-20240514083259-c5d1f3f5f99e
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
github.com/yuin/goldmark v1.5.6
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/net v0.23.0
Expand All @@ -38,13 +40,13 @@ require (
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/andybalholm/brotli v1.0.1 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.1.1 // indirect
github.com/gookit/color v1.5.4 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.11 // indirect
github.com/jdkato/go-tree-sitter-julia v0.1.0 // indirect
github.com/klauspost/compress v1.11.4 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/kr/pretty v0.3.0 // indirect
Expand All @@ -55,6 +57,7 @@ require (
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/nwaples/rardecode v1.1.0 // indirect
github.com/pierrec/lz4/v4 v4.1.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/spf13/cast v1.3.1 // indirect
Expand All @@ -65,4 +68,5 @@ require (
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
gopkg.in/neurosnap/sentences.v1 v1.0.7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/jdkato/go-tree-sitter-julia v0.0.0-20240531060609-b738d045ba2d h1:nc/Dgjp4Zr3drV44bz2+fUaCb1ZZvFtNnudyuaWL7uQ=
github.com/jdkato/go-tree-sitter-julia v0.0.0-20240531060609-b738d045ba2d/go.mod h1:lXNEZorcvU63DcANEklLMbDRjwam4VQ44MIV1Cck0w8=
github.com/jdkato/go-tree-sitter-julia v0.1.0 h1:z+6zTbd6PHMKAge7GJx9QIwPQX2NOKb4Pj5jteJvaYY=
github.com/jdkato/go-tree-sitter-julia v0.1.0/go.mod h1:lXNEZorcvU63DcANEklLMbDRjwam4VQ44MIV1Cck0w8=
github.com/jdkato/twine v0.10.1 h1:Jexy1dua9nRyr45AQ3Bml1nCVYq3VIi9g09MOkg2Wwk=
Expand Down
44 changes: 23 additions & 21 deletions internal/core/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,27 +175,28 @@ type CLIFlags struct {
// Config holds the configuration values from both the CLI and `.vale.ini`.
type Config struct {
// General configuration
BlockIgnores map[string][]string // A list of blocks to ignore
Checks []string // All checks to load
Formats map[string]string // A map of unknown -> known formats
Asciidoctor map[string]string // A map of asciidoctor attributes
FormatToLang map[string]string // A map of format to lang ID
GBaseStyles []string // Global base style
GChecks map[string]bool // Global checks
IgnoredClasses []string // A list of HTML classes to ignore
IgnoredScopes []string // A list of HTML tags to ignore
MinAlertLevel int // Lowest alert level to display
Vocab []string // The active project
RuleToLevel map[string]string // Single-rule level changes
SBaseStyles map[string][]string // Syntax-specific base styles
SChecks map[string]map[string]bool // Syntax-specific checks
SkippedScopes []string // A list of HTML blocks to ignore
Stylesheets map[string]string // XSLT stylesheet
TokenIgnores map[string][]string // A list of tokens to ignore
WordTemplate string // The template used in YAML -> regexp list conversions
RootINI string // the path to the project's .vale.ini file
Paths []string // A list of paths to search for styles
ConfigFiles []string // A list of configuration files to load
BlockIgnores map[string][]string // A list of blocks to ignore
Checks []string // All checks to load
Formats map[string]string // A map of unknown -> known formats
Asciidoctor map[string]string // A map of asciidoctor attributes
FormatToLang map[string]string // A map of format to lang ID
GBaseStyles []string // Global base style
GChecks map[string]bool // Global checks
IgnoredClasses []string // A list of HTML classes to ignore
IgnoredScopes []string // A list of HTML tags to ignore
MinAlertLevel int // Lowest alert level to display
Vocab []string // The active project
RuleToLevel map[string]string // Single-rule level changes
SBaseStyles map[string][]string // Syntax-specific base styles
SChecks map[string]map[string]bool // Syntax-specific checks
SkippedScopes []string // A list of HTML blocks to ignore
Stylesheets map[string]string // XSLT stylesheet
TokenIgnores map[string][]string // A list of tokens to ignore
CommentDelimiters map[string][2]string // Strings to treat as comment delimiters. Indicates the start and end delimiters.
WordTemplate string // The template used in YAML -> regexp list conversions
RootINI string // the path to the project's .vale.ini file
Paths []string // A list of paths to search for styles
ConfigFiles []string // A list of configuration files to load

AcceptedTokens []string `json:"-"` // Project-specific vocabulary (okay)
RejectedTokens []string `json:"-"` // Project-specific vocabulary (avoid)
Expand Down Expand Up @@ -229,6 +230,7 @@ func NewConfig(flags *CLIFlags) (*Config, error) {
cfg.SecToPat = make(map[string]glob.Glob)
cfg.Stylesheets = make(map[string]string)
cfg.TokenIgnores = make(map[string][]string)
cfg.CommentDelimiters = make(map[string][2]string)
cfg.FormatToLang = make(map[string]string)
cfg.Paths = []string{}
cfg.ConfigFiles = []string{}
Expand Down
14 changes: 14 additions & 0 deletions internal/core/ini.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ var syntaxOpts = map[string]func(string, *ini.Section, *Config) error{
cfg.BlockIgnores[label] = mergeValues(sec.Key("BlockIgnores").StringsWithShadows(","))
return nil
},
"CommentDelimiters": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam

Check failure on line 113 in internal/core/ini.go

View workflow job for this annotation

GitHub Actions / lint

directive `//nolint:unparam` is unused for linter "unparam" (nolintlint)
d := mergeValues(sec.Key("CommentDelimiters").StringsWithShadows(","))
if len(d) != 2 {
return NewE201FromTarget(
fmt.Sprintf("CommentDelimiters must be a comma-separated list of two delimiters, but got %v items", len(d)),
label,
cfg.Flags.Path)
}
var c [2]string
c[0], c[1] = d[0], d[1]
cfg.CommentDelimiters[label] = c
return nil

},
"TokenIgnores": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam
cfg.TokenIgnores[label] = mergeValues(sec.Key("TokenIgnores").StringsWithShadows(","))
return nil
Expand Down
96 changes: 96 additions & 0 deletions internal/core/ini_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package core

import (
"testing"

"github.com/stretchr/testify/assert"
)

func Test_processConfig_commentDelimiters(t *testing.T) {
cases := []struct {
description string
body string
expected map[string][2]string
}{
{
description: "custom comment delimiters for markdown",
body: `[*.md]
CommentDelimiters = "{/*,*/}"
`,
expected: map[string][2]string{
"*.md": [2]string{"{/*", "*/}"},
},
},
{
description: "not set",
body: `[*.md]
TokenIgnores = (\$+[^\n$]+\$+)
`,
expected: map[string][2]string{},
},
}

for _, c := range cases {
t.Run(c.description, func(t *testing.T) {
uCfg, err := shadowLoad([]byte(c.body))
assert.NoError(t, err)
conf, err := NewConfig(&CLIFlags{})
assert.NoError(t, err)
_, err = processConfig(uCfg, conf, false)
assert.NoError(t, err)
actual := conf.CommentDelimiters
assert.Equal(t, c.expected, actual)
})
}
}

func Test_processConfig_commentDelimiters_error(t *testing.T) {
cases := []struct {
description string
body string
expectedErr string
}{
{
description: "global custom comment delimiters",
body: `[*]
CommentDelimiters = "{/*,*/}"
`,
expectedErr: "syntax-specific option",
},
{
description: "more than two delimiters",
body: `[*.md]
CommentDelimiters = "{/*,*/},<<,>>"
`,
expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 4 items",
},
{
description: "more than two delimiters (shadow)",
body: `[*.md]
CommentDelimiters = "{/*,*/}"
[*.md]
CommentDelimiters = "<<,>>"
`,
expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 4 items",
},
{
description: "one delimiter is empty",
body: `[*.md]
CommentDelimiters = "{/*"
`,
expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 1 items",
},
}

for _, c := range cases {
t.Run(c.description, func(t *testing.T) {
uCfg, err := shadowLoad([]byte(c.body))
assert.NoError(t, err)
conf, err := NewConfig(&CLIFlags{})
assert.NoError(t, err)
_, err = processConfig(uCfg, conf, false)
assert.ErrorContains(t, err, c.expectedErr)
})
}
}
106 changes: 90 additions & 16 deletions internal/lint/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,40 @@ func (l *Linter) lintHTML(f *core.File) error {
return l.lintHTMLTokens(f, []byte(f.Content), 0)
}

func (l *Linter) applyPatterns(f *core.File, block, inline string) (string, error) {
type extensionConfig struct {
Normed, Real string
}

var blockDelimiters map[string]string = map[string]string{

Check warning on line 31 in internal/lint/html.go

View workflow job for this annotation

GitHub Actions / lint

var-declaration: should omit type map[string]string from declaration of var blockDelimiters; it will be inferred from the right-hand side (revive)
".adoc": "\n----\n$1\n----\n",
".md": "\n```\n$1\n```\n",
".rst": "\n::\n\n%s\n",
".org": orgExample,
}

func applyBlockPatterns(c *core.Config, exts extensionConfig, content string) (string, error) {
block, ok := blockDelimiters[exts.Normed]
if !ok {
return content, fmt.Errorf("ignore patterns are not supported in '%s' files", exts.Normed)
}

// TODO: Should we assume this?
s := reFrontMatter.ReplaceAllString(f.Content, block)
s := reFrontMatter.ReplaceAllString(content, block)

exts := []string{f.NormedExt, f.RealExt}
for syntax, regexes := range l.Manager.Config.BlockIgnores {
for syntax, regexes := range c.BlockIgnores {
sec, err := glob.Compile(syntax)
if err != nil {
return s, err
} else if sec.MatchAny(exts) {
} else if sec.Match(exts.Normed) || sec.Match(exts.Real) {
for _, r := range regexes {
pat, errc := regexp2.CompileStd(r)
if errc != nil { //nolint:gocritic
return s, core.NewE201FromTarget(
errc.Error(),
r,
l.Manager.Config.Flags.Path,
c.Flags.Path,
)
} else if strings.HasSuffix(f.NormedExt, ".rst") {
} else if strings.HasSuffix(exts.Normed, ".rst") {
// HACK: We need to add padding for the literal block.
for _, c := range pat.FindAllStringSubmatch(s, -1) {
sec := fmt.Sprintf(block, core.Indent(c[0], " "))
Expand All @@ -54,39 +69,98 @@ func (l *Linter) applyPatterns(f *core.File, block, inline string) (string, erro
return s, core.NewE201FromTarget(
err.Error(),
r,
l.Manager.Config.Flags.Path,
c.Flags.Path,
)
}
}
}
}
}
return s, nil
}

var inlineDelimiters map[string]string = map[string]string{

Check warning on line 82 in internal/lint/html.go

View workflow job for this annotation

GitHub Actions / lint

var-declaration: should omit type map[string]string from declaration of var inlineDelimiters; it will be inferred from the right-hand side (revive)
".adoc": "`$1`",
".md": "`$1`",
".rst": "``$1``",
".org": "=$1=",
}

func applyInlinePatterns(c *core.Config, exts extensionConfig, content string) (string, error) {
inline, ok := inlineDelimiters[exts.Normed]
if !ok {
return content, fmt.Errorf("ignore patterns are not supported in '%s' files", exts.Normed)
}

for syntax, regexes := range l.Manager.Config.TokenIgnores {
for syntax, regexes := range c.TokenIgnores {
sec, err := glob.Compile(syntax)
if err != nil {
return s, err
} else if sec.MatchAny(exts) {
return content, err
} else if sec.Match(exts.Normed) || sec.Match(exts.Real) {
for _, r := range regexes {
pat, errc := regexp2.CompileStd(r)
if errc != nil {
return s, core.NewE201FromTarget(
return content, core.NewE201FromTarget(
errc.Error(),
r,
l.Manager.Config.Flags.Path,
c.Flags.Path,
)
}
s, err = pat.Replace(s, inline, 0, -1)
content, err = pat.Replace(content, inline, 0, -1)
if err != nil {
return s, core.NewE201FromTarget(
return content, core.NewE201FromTarget(
err.Error(),
r,
l.Manager.Config.Flags.Path,
c.Flags.Path,
)
}
}
}
}
return content, nil
}

// applyCommentPatterns replaces any custom comment delimiters with HTML comment
// tags based on the user configuration. This makes it possible to apply
// comment-based controls using custom comment delimiters.
func applyCommentPatterns(c *core.Config, exts extensionConfig, content string) (string, error) {
for syntax, delims := range c.CommentDelimiters {
sec, err := glob.Compile(syntax)
if err != nil {
return content, err
} else if sec.Match(exts.Normed) || sec.Match(exts.Real) {
// This field was not assigned, so do nothing.
if delims[0] == "" && delims[1] == "" {
return content, nil
}
// Return an error if only one delimiter is configured
if (delims[0] == "" && delims[1] != "") || (delims[0] != "" && delims[1] == "") {
return content, fmt.Errorf("CommentDelimiters must be empty or have two values")
}

content = strings.ReplaceAll(content, delims[0], "<!--")
content = strings.ReplaceAll(content, delims[1], "-->")

Check failure on line 143 in internal/lint/html.go

View workflow job for this annotation

GitHub Actions / lint

unnecessary trailing newline (whitespace)
}
}
return content, nil
}

func applyPatterns(c *core.Config, exts extensionConfig, content string) (string, error) {
s, err := applyBlockPatterns(c, exts, content)
if err != nil {
return s, err
}

s, err = applyInlinePatterns(c, exts, s)
if err != nil {
return s, err
}

s, err = applyCommentPatterns(c, exts, s)
if err != nil {
return s, err
}

return s, nil
}
Expand Down
Loading

0 comments on commit b6df01b

Please sign in to comment.