Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

helpers: Add option to transliterate content paths #11246

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions common/text/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"sync"
"unicode"

"github.com/alexsergivan/transliterator"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
Expand Down Expand Up @@ -76,3 +78,11 @@ func VisitLinesAfter(s string, fn func(line string)) {
fn(s)
}
}

// Transliterate converts the given string s from Unicode to ASCII using rules
// predefined for the given language lang.
func Transliterate(s string, lang string) string {
trans := transliterator.NewTransliterator(nil)
language, _ := language.Make(lang).Base()
return trans.Transliterate(s, language.String())
}
11 changes: 10 additions & 1 deletion common/text/transform_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ func TestRemoveAccents(t *testing.T) {
c.Assert(string(RemoveAccentsString("Resumé")), qt.Equals, "Resume")
}

func TestTransliterate(t *testing.T) {
c := qt.New(t)

c.Assert(string(Transliterate("Hugo", "en")), qt.Equals, "Hugo")
c.Assert(string(Transliterate("áéíñóú", "en")), qt.Equals, "aeinou")
c.Assert(string(Transliterate("ÄÖÜäöüß", "en")), qt.Equals, "AOUaouss")
c.Assert(string(Transliterate("ÄÖÜäöüß", "de")), qt.Equals, "AeOeUeaeoeuess")
}

func TestChomp(t *testing.T) {
c := qt.New(t)

Expand Down Expand Up @@ -63,7 +72,7 @@ line 3`
func BenchmarkVisitLinesAfter(b *testing.B) {
const lines = `line 1
line 2

line 3`

for i := 0; i < b.N; i++ {
Expand Down
3 changes: 3 additions & 0 deletions config/allconfig/allconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,9 @@ type RootConfig struct {
// Removes non-spacing marks from composite characters in content paths.
RemovePathAccents bool

// Converts content paths from Unicode to ASCII.
TransliteratePaths bool

// Whether to track and print unused templates during the build.
PrintUnusedTemplates bool

Expand Down
4 changes: 4 additions & 0 deletions config/allconfig/configlanguage.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ func (c ConfigLanguage) RemovePathAccents() bool {
return c.config.RemovePathAccents
}

func (c ConfigLanguage) TransliteratePaths() bool {
return c.config.TransliteratePaths
}

func (c ConfigLanguage) DefaultContentLanguage() string {
return c.config.DefaultContentLanguage
}
Expand Down
1 change: 1 addition & 0 deletions config/allconfig/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ func (l configLoader) applyDefaultConfig() error {
"canonifyURLs": false,
"relativeURLs": false,
"removePathAccents": false,
"transliteratePaths": false,
"titleCaseStyle": "AP",
"taxonomies": maps.Params{"tag": "tags", "category": "categories"},
"permalinks": maps.Params{},
Expand Down
1 change: 1 addition & 0 deletions config/configProvider.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type AllProvider interface {
CanonifyURLs() bool
DisablePathToLower() bool
RemovePathAccents() bool
TransliteratePaths() bool
IsUglyURLs(section string) bool
DefaultContentLanguage() string
DefaultContentLanguageInSubdir() bool
Expand Down
42 changes: 42 additions & 0 deletions docs/content/en/functions/transform/Transliterate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
title: transform.Transliterate
description: Returns the given string, converting Unicode to ASCII.
categories: []
keywords: []
action:
aliases: []
related: []
returnType: string
signatures: [transform.Transliterate INPUT]
---

The `transform.Transliterate` function converts a string from Unicode to ASCII using rules predefined for your site's `defaultContentLanguage`, or using default rules if language-specific rules do not exist.

Hugo provides language-specific transliteration rules for Bosnian (bs), Bulgarian (bg), Catalan (ca), Croatian (hr), Danish (da), Esperanto (eo), German (de), Hungarian (hu), Macedonian (mk), Norwegian Bokmål (nb), Russian (ru), Serbian (sr), Slovenian (sl), Swedish (sv), and Ukrainian (uk).

For a site with English (en) as the default content language:

```go-html-template
{{ transform.Transliterate "Hugo" }} → Hugo
{{ transform.Transliterate "çđħłƚŧ" }} → cdhllt
{{ transform.Transliterate "áéíñóú" }} → aeinou
{{ transform.Transliterate "ÄÖÜäöüß" }} → AOUaouss
```

For a site with German (de) as the default content language:

```go-html-template
{{ transform.Transliterate "ÄÖÜäöüß" }} → AeOeUeaeoeuess
```

If you have enabled [`transliteratePath`] in your site configuration, you can use `transform.Transliterate` with the [`GetPage`] method to retrieve term pages:

[`transliteratePath`]: /getting-started/configuration/#transliteratepath
[`GetPage`]: /methods/site/getpage/


```go-html-template
{{ with .Site.GetPage (path.Join "tags" (transform.Transliterate "çđħłƚŧ äöü" | anchorize)) }}
<a href="{{ .RelPermalink }}">{{ .LinkTitle }}</a>
{{ end }}
```
50 changes: 49 additions & 1 deletion docs/content/en/getting-started/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,30 @@ Hugo uses the `hugo.toml`, `hugo.yaml`, or `hugo.json` (if found in the
site root) as the default site configuration file.

The user can choose to override that default with one or more site configuration files using the command-line `--config` switch.
### transliteratePath

**Default value:** false

Converts path characters from Unicode to ASCII using rules predefined for your site's `defaultContentLanguage`, or using default rules if language-specific rules do not exist.

Hugo provides language-specific transliteration rules for Bosnian (bs), Bulgarian (bg), Catalan (ca), Croatian (hr), Danish (da), Esperanto (eo), German (de), Hungarian (hu), Macedonian (mk), Norwegian Bokmål (nb), Russian (ru), Serbian (sr), Slovenian (sl), Swedish (sv), and Ukrainian (uk).

Hugo ignores the `removePathAccents` setting if you enable `transliteratePath`.

For a site with English (en) as the default content language:

```text
content/Hugo.md → https://example.org/hugo/
content/çđħłƚŧ.md → https://example.org/cdhllt/
content/áéíñóú.md → https://example.org/aeinou/
content/ÄÖÜäöüß.md → https://example.org/AOUaouss/
```

For a site with German (de) as the default content language:

```text
content/ÄÖÜäöüß.md → https://example.org/AeOeUeaeoeuess/
```
Examples:

```txt
Expand Down Expand Up @@ -455,7 +478,7 @@ See [Configure Taxonomies](/content-management/taxonomies#configure-taxonomies).

### theme

: See [module configuration](/hugo-modules/configuration/#module-configuration-imports) for how to import a theme.
See [module configuration](/hugo-modules/configuration/#module-configuration-imports) for how to import a theme.

### themesDir

Expand Down Expand Up @@ -483,6 +506,31 @@ Site title.

See [Configure Title Case](#configure-title-case)

### transliteratePath

**Default value:** false

Converts path characters from Unicode to ASCII using rules predefined for your site's `defaultContentLanguage`, or using default rules if language-specific rules do not exist.

Hugo provides language-specific transliteration rules for Bosnian (bs), Bulgarian (bg), Catalan (ca), Croatian (hr), Danish (da), Esperanto (eo), German (de), Hungarian (hu), Macedonian (mk), Norwegian Bokmål (nb), Russian (ru), Serbian (sr), Slovenian (sl), Swedish (sv), and Ukrainian (uk).

Hugo ignores the `removePathAccents` setting if you enable `transliteratePath`.

For a site with English (en) as the default content language:

```text
content/Hugo.md → https://example.org/hugo/
content/çđħłƚŧ.md → https://example.org/cdhllt/
content/áéíñóú.md → https://example.org/aeinou/
content/ÄÖÜäöüß.md → https://example.org/AOUaouss/
```

For a site with German (de) as the default content language:

```text
content/ÄÖÜäöüß.md → https://example.org/AeOeUeaeoeuess/
```

### uglyURLs

**Default value:** false
Expand Down
11 changes: 11 additions & 0 deletions docs/data/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,7 @@ config:
timeout: 30s
title: ""
titleCaseStyle: AP
transliteratePaths: false
uglyURLs: false
workingDir: ""
config_helpers:
Expand Down Expand Up @@ -4367,6 +4368,16 @@ tpl:
{
"title": "Hello World"
}
Transliterate:
Aliases: null
Args:
- s
Description: Transliterate converts Unicode to ASCII.
Examples:
- - '{{ "áéíñóú" | transform.Transliterate }}'
- aeinou
- - '{{ "çđħłƚŧ" | transform.Transliterate }}'
- cdhllt
Unmarshal:
Aliases:
- unmarshal
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ require (
github.com/BurntSushi/locker v0.0.0-20171006230638-a6e239ea1c69
github.com/PuerkitoBio/purell v1.1.1
github.com/alecthomas/chroma/v2 v2.11.1
github.com/alexsergivan/transliterator v1.0.0
github.com/armon/go-radix v1.0.0
github.com/aws/aws-sdk-go v1.48.6
github.com/bep/clocks v0.5.0
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ github.com/alecthomas/assert/v2 v2.2.1 h1:XivOgYcduV98QCahG8T5XTezV5bylXe+lBxLG2
github.com/alecthomas/chroma/v2 v2.11.1 h1:m9uUtgcdAwgfFNxuqj7AIG75jD2YmL61BBIJWtdzJPs=
github.com/alecthomas/chroma/v2 v2.11.1/go.mod h1:4TQu7gdfuPjSh76j78ietmqh9LiurGF0EpseFXdKMBw=
github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
github.com/alexsergivan/transliterator v1.0.0 h1:SAA+fkGZKLnak47h8Dr6829IE2kpSZR2Y3yTd69cIwY=
github.com/alexsergivan/transliterator v1.0.0/go.mod h1:0IrumukulURJ4PD0z6UcdJKP2job1DYDhnHAP5y+5pE=
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/aws/aws-sdk-go v1.48.6 h1:hnL/TE3eRigirDLrdRE9AWE1ALZSVLAsC4wK8TGsMqk=
Expand Down Expand Up @@ -320,6 +322,7 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
Expand Down
53 changes: 53 additions & 0 deletions helpers/integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2022 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package helpers_test

import (
"testing"

"github.com/gohugoio/hugo/hugolib"
)

func TestTransliterate(t *testing.T) {
t.Parallel()

files := `
-- config.toml --
disableKinds = ['RSS','sitemap','taxonomy','term']
disablePathToLower = true
removePathAccents = false
transliteratePaths = true
-- content/Hugo.md --
---
title: Hugo
---
-- content/áéíñóú.md --
---
title: Áéíñóú
---
-- content/çđħłƚŧ.md --
---
title: Çđħłƚŧ
---
-- layouts/_default/single.html --
{{ .Title }}
`

b := hugolib.Test(t, files)

b.AssertFileContent("public/Hugo/index.html", "Hugo")
b.AssertFileContent("public/aeinou/index.html", "Áéíñóú")
b.AssertFileContent("public/cdhllt/index.html", "Çđħłƚŧ")

}
14 changes: 10 additions & 4 deletions helpers/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,19 @@ func ishex(c rune) bool {

// UnicodeSanitize sanitizes string to be used in Hugo URL's, allowing only
// a predefined set of special Unicode characters.
// If RemovePathAccents configuration flag is enabled, Unicode accents
// are also removed.
// If the TransliteratePaths configuration flag is enabled, Unicode characters
// will be converted to ASCII.
// If the RemovePathAccents configuration flag is enabled, non-spacing marks in
// composite characters are removed.
// Hyphens in the original input are maintained.
// Spaces will be replaced with a single hyphen, and sequential replacement hyphens will be reduced to one.
func (p *PathSpec) UnicodeSanitize(s string) string {
if p.Cfg.RemovePathAccents() {
s = text.RemoveAccentsString(s)
if p.Cfg.TransliteratePaths() {
s = text.Transliterate(s, p.Cfg.DefaultContentLanguage())
} else {
if p.Cfg.RemovePathAccents() {
s = text.RemoveAccentsString(s)
}
}

source := []rune(s)
Expand Down
8 changes: 8 additions & 0 deletions tpl/transform/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ func init() {
},
)

ns.AddMethodMapping(ctx.Transliterate,
nil,
[][2]string{
{`{{ transform.Transliterate "áéíñóú" }}`, `aeinou`},
{`{{ transform.Transliterate "çđħłƚŧ" }}`, `cdhllt`},
},
)

ns.AddMethodMapping(ctx.Unmarshal,
[]string{"unmarshal"},
[][2]string{
Expand Down
25 changes: 25 additions & 0 deletions tpl/transform/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ disableKinds = ['page','rss','section','sitemap','taxonomy','term']
-- layouts/index.html --
{{ highlight "a" "b" 0 }}
`

b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
Expand All @@ -107,3 +108,27 @@ disableKinds = ['page','rss','section','sitemap','taxonomy','term']
_, err := b.BuildE()
b.Assert(err.Error(), qt.Contains, "error calling highlight: invalid Highlight option: 0")
}

func TestTransliterate(t *testing.T) {
t.Parallel()

files := `
-- config.toml --
disableKinds = ['RSS','sitemap','taxonomy','term']
defaultContentLanguage = 'de'
-- layouts/index.html --
{{ transform.Transliterate "Hugo" }}
{{ transform.Transliterate "áéíñóú" }}
{{ transform.Transliterate "çđħłƚŧ" }}
{{ transform.Transliterate "ÄÖÜäöüß" }}
`

b := hugolib.Test(t, files)

b.AssertFileContent("public/index.html", `
Hugo
aeinou
cdhllt
AeOeUeaeoeuess
`)
}
Loading
Loading