Skip to content

Commit

Permalink
perf(mangling): optimized the processing of initialisms
Browse files Browse the repository at this point in the history
This PR significantly improves the performance of name mangling
utilities (e.g. ToGoName, etc).

The need for this occured while benchmarking go-swagger's CI suite:
surprisingly, the topmost allocator was swag.ToGoName.

It is the result of a dozen successive optimization passes driven
by profiling.

These functions now execute ~10x faster and need 100 times less
memory allocations. See BENCHMARK.md

Optimization techniques used to reduce allocations:
* pointer -> value (everything was pointers: now everything is values)
* interface -> struct with a Kind field
* closure -> func
* string to []byte: use unsafe conversion
* string concatenation -> use bytes.Buffer (recyclable, unlike
strings.Builder)
* static values converted over and over again: pre-bake the conversions
* var x []T  -> make([]T, 0, heuristic size)
* temporarily allocated values -> use pool to recycle previously
allocated data items

Optimization techniques used to reduce CPU:
* read unicode rune -> short-circuit for single byte runes
* map lookup -> func with switch statement
* for i, v := range -> for i := range (minor impact)

Signed-off-by: Frédéric BIDON <fredbi@yahoo.com>
  • Loading branch information
fredbi committed Jan 11, 2024
1 parent b3e7a53 commit 78a9375
Show file tree
Hide file tree
Showing 11 changed files with 820 additions and 279 deletions.
52 changes: 52 additions & 0 deletions BENCHMARK.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Benchmarks

## Name mangling utilities

```bash
go test -bench XXX -run XXX -benchtime 30s
```

### Benchmarks at b3e7a5386f996177e4808f11acb2aa93a0f660df

```
goos: linux
goarch: amd64
pkg: github.com/go-openapi/swag
cpu: Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
BenchmarkToXXXName/ToGoName-4 862623 44101 ns/op 10450 B/op 732 allocs/op
BenchmarkToXXXName/ToVarName-4 853656 40728 ns/op 10468 B/op 734 allocs/op
BenchmarkToXXXName/ToFileName-4 1268312 27813 ns/op 9785 B/op 617 allocs/op
BenchmarkToXXXName/ToCommandName-4 1276322 27903 ns/op 9785 B/op 617 allocs/op
BenchmarkToXXXName/ToHumanNameLower-4 895334 40354 ns/op 10472 B/op 731 allocs/op
BenchmarkToXXXName/ToHumanNameTitle-4 882441 40678 ns/op 10566 B/op 749 allocs/op
```

### Benchmarks after PR #79

~ x10 performance improvement and ~ /100 memory allocations.

```
goos: linux
goarch: amd64
pkg: github.com/go-openapi/swag
cpu: Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
BenchmarkToXXXName/ToGoName-4 9595830 3991 ns/op 42 B/op 5 allocs/op
BenchmarkToXXXName/ToVarName-4 9194276 3984 ns/op 62 B/op 7 allocs/op
BenchmarkToXXXName/ToFileName-4 17002711 2123 ns/op 147 B/op 7 allocs/op
BenchmarkToXXXName/ToCommandName-4 16772926 2111 ns/op 147 B/op 7 allocs/op
BenchmarkToXXXName/ToHumanNameLower-4 9788331 3749 ns/op 92 B/op 6 allocs/op
BenchmarkToXXXName/ToHumanNameTitle-4 9188260 3941 ns/op 104 B/op 6 allocs/op
```

```
goos: linux
goarch: amd64
pkg: github.com/go-openapi/swag
cpu: AMD Ryzen 7 5800X 8-Core Processor
BenchmarkToXXXName/ToGoName-16 18527378 1972 ns/op 42 B/op 5 allocs/op
BenchmarkToXXXName/ToVarName-16 15552692 2093 ns/op 62 B/op 7 allocs/op
BenchmarkToXXXName/ToFileName-16 32161176 1117 ns/op 147 B/op 7 allocs/op
BenchmarkToXXXName/ToCommandName-16 32256634 1137 ns/op 147 B/op 7 allocs/op
BenchmarkToXXXName/ToHumanNameLower-16 18599661 1946 ns/op 92 B/op 6 allocs/op
BenchmarkToXXXName/ToHumanNameTitle-16 17581353 2054 ns/op 105 B/op 6 allocs/op
```
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ module github.com/go-openapi/swag

require (
github.com/mailru/easyjson v0.7.7
github.com/pkg/profile v1.7.0
github.com/stretchr/testify v1.8.0
gopkg.in/yaml.v3 v3.0.1
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/felixge/fgprof v0.9.3 // indirect
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
Expand Down
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
Expand All @@ -12,13 +20,16 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
Expand Down
137 changes: 137 additions & 0 deletions initialism_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,130 @@ package swag

import (
"sort"
"strings"
"sync"
)

var (
// commonInitialisms are common acronyms that are kept as whole uppercased words.
commonInitialisms *indexOfInitialisms

// initialisms is a slice of sorted initialisms
initialisms []string

// a copy of initialisms pre-baked as []rune
initialismsRunes [][]rune
initialismsUpperCased [][]rune

isInitialism func(string) bool

maxAllocMatches int
)

func init() {
// Taken from https://github.com/golang/lint/blob/3390df4df2787994aea98de825b964ac7944b817/lint.go#L732-L769
configuredInitialisms := map[string]bool{
"ACL": true,
"API": true,
"ASCII": true,
"CPU": true,
"CSS": true,
"DNS": true,
"EOF": true,
"GUID": true,
"HTML": true,
"HTTPS": true,
"HTTP": true,
"ID": true,
"IP": true,
"IPv4": true,
"IPv6": true,
"JSON": true,
"LHS": true,
"OAI": true,
"QPS": true,
"RAM": true,
"RHS": true,
"RPC": true,
"SLA": true,
"SMTP": true,
"SQL": true,
"SSH": true,
"TCP": true,
"TLS": true,
"TTL": true,
"UDP": true,
"UI": true,
"UID": true,
"UUID": true,
"URI": true,
"URL": true,
"UTF8": true,
"VM": true,
"XML": true,
"XMPP": true,
"XSRF": true,
"XSS": true,
}

// a thread-safe index of initialisms
commonInitialisms = newIndexOfInitialisms().load(configuredInitialisms)
initialisms = commonInitialisms.sorted()
initialismsRunes = asRunes(initialisms)
initialismsUpperCased = asUpperCased(initialisms)
maxAllocMatches = maxAllocHeuristic(initialismsRunes)

// a test function
isInitialism = commonInitialisms.isInitialism
}

func asRunes(in []string) [][]rune {
out := make([][]rune, len(in))
for i, initialism := range in {
out[i] = []rune(initialism)
}

return out
}

func asUpperCased(in []string) [][]rune {
out := make([][]rune, len(in))

for i, initialism := range in {
out[i] = []rune(upper(trim(initialism)))
}

return out
}

func maxAllocHeuristic(in [][]rune) int {
heuristic := make(map[rune]int)
for _, initialism := range in {
heuristic[initialism[0]]++
}

var maxAlloc int
for _, val := range heuristic {
if val > maxAlloc {
maxAlloc = val
}
}

return maxAlloc
}

// AddInitialisms add additional initialisms
func AddInitialisms(words ...string) {
for _, word := range words {
// commonInitialisms[upper(word)] = true
commonInitialisms.add(upper(word))
}
// sort again
initialisms = commonInitialisms.sorted()
initialismsRunes = asRunes(initialisms)
initialismsUpperCased = asUpperCased(initialisms)
}

// indexOfInitialisms is a thread-safe implementation of the sorted index of initialisms.
// Since go1.9, this may be implemented with sync.Map.
type indexOfInitialisms struct {
Expand Down Expand Up @@ -63,3 +184,19 @@ func (m *indexOfInitialisms) sorted() (result []string) {
sort.Sort(sort.Reverse(byInitialism(result)))
return
}

type byInitialism []string

func (s byInitialism) Len() int {
return len(s)
}
func (s byInitialism) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s byInitialism) Less(i, j int) bool {
if len(s[i]) != len(s[j]) {
return len(s[i]) < len(s[j])
}

return strings.Compare(s[i], s[j]) > 0
}
70 changes: 38 additions & 32 deletions name_lexem.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,74 +14,80 @@

package swag

import "unicode"
import (
"unicode"
"unicode/utf8"
)

type (
nameLexem interface {
GetUnsafeGoName() string
GetOriginal() string
IsInitialism() bool
}
lexemKind uint8

initialismNameLexem struct {
nameLexem struct {
original string
matchedInitialism string
kind lexemKind
}
)

casualNameLexem struct {
original string
}
const (
lexemKindCasualName lexemKind = iota
lexemKindInitialismName
)

func newInitialismNameLexem(original, matchedInitialism string) *initialismNameLexem {
return &initialismNameLexem{
func newInitialismNameLexem(original, matchedInitialism string) nameLexem {
return nameLexem{
kind: lexemKindInitialismName,
original: original,
matchedInitialism: matchedInitialism,
}
}

func newCasualNameLexem(original string) *casualNameLexem {
return &casualNameLexem{
func newCasualNameLexem(original string) nameLexem {
return nameLexem{
kind: lexemKindCasualName,
original: original,
}
}

func (l *initialismNameLexem) GetUnsafeGoName() string {
return l.matchedInitialism
}
func (l nameLexem) GetUnsafeGoName() string {
if l.kind == lexemKindInitialismName {
return l.matchedInitialism
}

var (
first rune
rest string
)

func (l *casualNameLexem) GetUnsafeGoName() string {
var first rune
var rest string
for i, orig := range l.original {
if i == 0 {
first = orig
continue
}

if i > 0 {
rest = l.original[i:]
break
}
}

if len(l.original) > 1 {
return string(unicode.ToUpper(first)) + lower(rest)
b := poolOfBuffers.BorrowBuffer(utf8.UTFMax + len(rest))
defer func() {
poolOfBuffers.RedeemBuffer(b)
}()
b.WriteRune(unicode.ToUpper(first))
b.WriteString(lower(rest))
return b.String()
}

return l.original
}

func (l *initialismNameLexem) GetOriginal() string {
func (l nameLexem) GetOriginal() string {
return l.original
}

func (l *casualNameLexem) GetOriginal() string {
return l.original
}

func (l *initialismNameLexem) IsInitialism() bool {
return true
}

func (l *casualNameLexem) IsInitialism() bool {
return false
func (l nameLexem) IsInitialism() bool {
return l.kind == lexemKindInitialismName
}
Loading

0 comments on commit 78a9375

Please sign in to comment.