Skip to content

Commit

Permalink
parser/pageparser: Don't store the byte slices
Browse files Browse the repository at this point in the history
On its own this change doesn't do any magic, but this is part of a bigger picture about making Hugo leaner in the
memory usage department.
  • Loading branch information
bep committed Jul 9, 2022
1 parent 72b0ccd commit 223bf28
Show file tree
Hide file tree
Showing 13 changed files with 385 additions and 198 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
- if: matrix.os == 'windows-latest'
run: |
choco install pandoc
choco install mingw --version 10.2.0 --allow-downgrade
- run: pandoc -v
- if: matrix.os == 'ubuntu-latest'
name: Install dart-sass-embedded Linux
Expand Down
24 changes: 12 additions & 12 deletions hugolib/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ func (p *pageState) mapContentForResult(
if fe, ok := err.(herrors.FileError); ok {
return fe
}
return p.parseError(err, iter.Input(), i.Pos)
return p.parseError(err, result.Input(), i.Pos())
}

// the parser is guaranteed to return items in proper order or fail, so …
Expand All @@ -656,14 +656,14 @@ Loop:
case it.Type == pageparser.TypeIgnore:
case it.IsFrontMatter():
f := pageparser.FormatFromFrontMatterType(it.Type)
m, err := metadecoders.Default.UnmarshalToMap(it.Val, f)
m, err := metadecoders.Default.UnmarshalToMap(it.Val(result.Input()), f)
if err != nil {
if fe, ok := err.(herrors.FileError); ok {
pos := fe.Position()
// Apply the error to the content file.
pos.Filename = p.File().Filename()
// Offset the starting position of front matter.
offset := iter.LineNumber() - 1
offset := iter.LineNumber(result.Input()) - 1
if f == metadecoders.YAML {
offset -= 1
}
Expand All @@ -687,7 +687,7 @@ Loop:

next := iter.Peek()
if !next.IsDone() {
p.source.posMainContent = next.Pos
p.source.posMainContent = next.Pos()
}

if !p.s.shouldBuild(p) {
Expand All @@ -699,10 +699,10 @@ Loop:
posBody := -1
f := func(item pageparser.Item) bool {
if posBody == -1 && !item.IsDone() {
posBody = item.Pos
posBody = item.Pos()
}

if item.IsNonWhitespace() {
if item.IsNonWhitespace(result.Input()) {
p.truncated = true

// Done
Expand All @@ -712,7 +712,7 @@ Loop:
}
iter.PeekWalk(f)

p.source.posSummaryEnd = it.Pos
p.source.posSummaryEnd = it.Pos()
p.source.posBodyStart = posBody
p.source.hasSummaryDivider = true

Expand All @@ -727,13 +727,13 @@ Loop:
// let extractShortcode handle left delim (will do so recursively)
iter.Backup()

currShortcode, err := s.extractShortcode(ordinal, 0, iter)
currShortcode, err := s.extractShortcode(ordinal, 0, result.Input(), iter)
if err != nil {
return fail(err, it)
}

currShortcode.pos = it.Pos
currShortcode.length = iter.Current().Pos - it.Pos
currShortcode.pos = it.Pos()
currShortcode.length = iter.Current().Pos() - it.Pos()
if currShortcode.placeholder == "" {
currShortcode.placeholder = createShortcodePlaceholder("s", currShortcode.ordinal)
}
Expand All @@ -754,15 +754,15 @@ Loop:
rn.AddShortcode(currShortcode)

case it.Type == pageparser.TypeEmoji:
if emoji := helpers.Emoji(it.ValStr()); emoji != nil {
if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil {
rn.AddReplacement(emoji, it)
} else {
rn.AddBytes(it)
}
case it.IsEOF():
break Loop
case it.IsError():
err := fail(errors.New(it.ValStr()), it)
err := fail(errors.New(it.ValStr(result.Input())), it)
currShortcode.err = err
return err

Expand Down
2 changes: 1 addition & 1 deletion hugolib/page__content.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func (p pageContent) contentToRender(parsed pageparser.Result, pm *pageContentMa
for _, it := range pm.items {
switch v := it.(type) {
case pageparser.Item:
c = append(c, source[v.Pos:v.Pos+len(v.Val)]...)
c = append(c, source[v.Pos():v.Pos()+len(v.Val(source))]...)
case pageContentReplacement:
c = append(c, v.val...)
case *shortcode:
Expand Down
26 changes: 13 additions & 13 deletions hugolib/shortcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ func (s *shortcodeHandler) parseError(err error, input []byte, pos int) error {
// pageTokens state:
// - before: positioned just before the shortcode start
// - after: shortcode(s) consumed (plural when they are nested)
func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.Iterator) (*shortcode, error) {
func (s *shortcodeHandler) extractShortcode(ordinal, level int, source []byte, pt *pageparser.Iterator) (*shortcode, error) {
if s == nil {
panic("handler nil")
}
Expand All @@ -520,7 +520,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
pt.Backup()
item := pt.Next()
if item.IsIndentation() {
sc.indentation = string(item.Val)
sc.indentation = item.ValStr(source)
}
}

Expand All @@ -530,7 +530,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
const errorPrefix = "failed to extract shortcode"

fail := func(err error, i pageparser.Item) error {
return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), pt.Input(), i.Pos)
return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), source, i.Pos())
}

Loop:
Expand All @@ -550,7 +550,7 @@ Loop:
if cnt > 0 {
// nested shortcode; append it to inner content
pt.Backup()
nested, err := s.extractShortcode(nestedOrdinal, nextLevel, pt)
nested, err := s.extractShortcode(nestedOrdinal, nextLevel, source, pt)
nestedOrdinal++
if nested != nil && nested.name != "" {
s.addName(nested.name)
Expand Down Expand Up @@ -589,7 +589,7 @@ Loop:
// return that error, more specific
continue
}
return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.Val), next)
return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.ValStr(source)), next)
}
}
if next.IsRightShortcodeDelim() {
Expand All @@ -602,19 +602,19 @@ Loop:

return sc, nil
case currItem.IsText():
sc.inner = append(sc.inner, currItem.ValStr())
sc.inner = append(sc.inner, currItem.ValStr(source))
case currItem.Type == pageparser.TypeEmoji:
// TODO(bep) avoid the duplication of these "text cases", to prevent
// more of #6504 in the future.
val := currItem.ValStr()
val := currItem.ValStr(source)
if emoji := helpers.Emoji(val); emoji != nil {
sc.inner = append(sc.inner, string(emoji))
} else {
sc.inner = append(sc.inner, val)
}
case currItem.IsShortcodeName():

sc.name = currItem.ValStr()
sc.name = currItem.ValStr(source)

// Used to check if the template expects inner content.
templs := s.s.Tmpl().LookupVariants(sc.name)
Expand All @@ -625,7 +625,7 @@ Loop:
sc.info = templs[0].(tpl.Info)
sc.templs = templs
case currItem.IsInlineShortcodeName():
sc.name = currItem.ValStr()
sc.name = currItem.ValStr(source)
sc.isInline = true
case currItem.IsShortcodeParam():
if !pt.IsValueNext() {
Expand All @@ -634,11 +634,11 @@ Loop:
// named params
if sc.params == nil {
params := make(map[string]any)
params[currItem.ValStr()] = pt.Next().ValTyped()
params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
sc.params = params
} else {
if params, ok := sc.params.(map[string]any); ok {
params[currItem.ValStr()] = pt.Next().ValTyped()
params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
} else {
return sc, errShortCodeIllegalState
}
Expand All @@ -647,11 +647,11 @@ Loop:
// positional params
if sc.params == nil {
var params []any
params = append(params, currItem.ValTyped())
params = append(params, currItem.ValTyped(source))
sc.params = params
} else {
if params, ok := sc.params.([]any); ok {
params = append(params, currItem.ValTyped())
params = append(params, currItem.ValTyped(source))
sc.params = params
} else {
return sc, errShortCodeIllegalState
Expand Down
6 changes: 4 additions & 2 deletions hugolib/shortcode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ title: "Shortcodes Galore!"
handler := newShortcodeHandler(nil, s)
iter := p.Iterator()

short, err := handler.extractShortcode(0, 0, iter)
short, err := handler.extractShortcode(0, 0, p.Input(), iter)

test.check(c, short, err)
})
Expand Down Expand Up @@ -763,7 +763,7 @@ title: "Hugo Rocks!"
)
}

func TestShortcodeTypedParams(t *testing.T) {
func TestShortcodeParams(t *testing.T) {
t.Parallel()
c := qt.New(t)

Expand All @@ -778,6 +778,7 @@ title: "Hugo Rocks!"
types positional: {{< hello true false 33 3.14 >}}
types named: {{< hello b1=true b2=false i1=33 f1=3.14 >}}
types string: {{< hello "true" trues "33" "3.14" >}}
escaped quoute: {{< hello "hello \"world\"." >}}
`).WithTemplatesAdded(
Expand All @@ -796,6 +797,7 @@ Get: {{ printf "%v (%T)" $b1 $b1 | safeHTML }}
"types positional: - 0: true (bool) - 1: false (bool) - 2: 33 (int) - 3: 3.14 (float64)",
"types named: - b1: true (bool) - b2: false (bool) - f1: 3.14 (float64) - i1: 33 (int) Get: true (bool) ",
"types string: - 0: true (string) - 1: trues (string) - 2: 33 (string) - 3: 3.14 (string) ",
"hello &#34;world&#34;. (string)",
)
}

Expand Down
71 changes: 55 additions & 16 deletions parser/pageparser/item.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,59 @@ import (
"github.com/yuin/goldmark/util"
)

type lowHigh struct {
Low int
High int
}

type Item struct {
Type ItemType
Pos int
Val []byte
Type ItemType
Err error

// The common case is a single segment.
low int
high int

// This is the uncommon case.
segments []lowHigh

// Used for validation.
firstByte byte

isString bool
}

type Items []Item

func (i Item) ValStr() string {
return string(i.Val)
func (i Item) Pos() int {
if len(i.segments) > 0 {
return i.segments[0].Low
}
return i.low
}

func (i Item) Val(source []byte) []byte {
if len(i.segments) == 0 {
return source[i.low:i.high]
}

if len(i.segments) == 1 {
return source[i.segments[0].Low:i.segments[0].High]
}

var b bytes.Buffer
for _, s := range i.segments {
b.Write(source[s.Low:s.High])
}
return b.Bytes()
}

func (i Item) ValStr(source []byte) string {
return string(i.Val(source))
}

func (i Item) ValTyped() any {
str := i.ValStr()
func (i Item) ValTyped(source []byte) any {
str := i.ValStr(source)
if i.isString {
// A quoted value that is a string even if it looks like a number etc.
return str
Expand Down Expand Up @@ -73,8 +111,8 @@ func (i Item) IsIndentation() bool {
return i.Type == tIndentation
}

func (i Item) IsNonWhitespace() bool {
return len(bytes.TrimSpace(i.Val)) > 0
func (i Item) IsNonWhitespace(source []byte) bool {
return len(bytes.TrimSpace(i.Val(source))) > 0
}

func (i Item) IsShortcodeName() bool {
Expand Down Expand Up @@ -125,20 +163,21 @@ func (i Item) IsError() bool {
return i.Type == tError
}

func (i Item) String() string {
func (i Item) ToString(source []byte) string {
val := i.Val(source)
switch {
case i.Type == tEOF:
return "EOF"
case i.Type == tError:
return string(i.Val)
return string(val)
case i.Type == tIndentation:
return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val))
return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val))
case i.Type > tKeywordMarker:
return fmt.Sprintf("<%s>", i.Val)
case len(i.Val) > 50:
return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
return fmt.Sprintf("<%s>", val)
case len(val) > 50:
return fmt.Sprintf("%v:%.20q...", i.Type, val)
}
return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
return fmt.Sprintf("%v:[%s]", i.Type, val)
}

type ItemType int
Expand Down
27 changes: 18 additions & 9 deletions parser/pageparser/item_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,22 @@ import (
func TestItemValTyped(t *testing.T) {
c := qt.New(t)

c.Assert(Item{Val: []byte("3.14")}.ValTyped(), qt.Equals, float64(3.14))
c.Assert(Item{Val: []byte(".14")}.ValTyped(), qt.Equals, float64(.14))
c.Assert(Item{Val: []byte("314")}.ValTyped(), qt.Equals, 314)
c.Assert(Item{Val: []byte("314x")}.ValTyped(), qt.Equals, "314x")
c.Assert(Item{Val: []byte("314 ")}.ValTyped(), qt.Equals, "314 ")
c.Assert(Item{Val: []byte("314"), isString: true}.ValTyped(), qt.Equals, "314")
c.Assert(Item{Val: []byte("true")}.ValTyped(), qt.Equals, true)
c.Assert(Item{Val: []byte("false")}.ValTyped(), qt.Equals, false)
c.Assert(Item{Val: []byte("trues")}.ValTyped(), qt.Equals, "trues")
source := []byte("3.14")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(3.14))
source = []byte(".14")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(0.14))
source = []byte("314")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, 314)
source = []byte("314")
c.Assert(Item{low: 0, high: len(source), isString: true}.ValTyped(source), qt.Equals, "314")
source = []byte("314x")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314x")
source = []byte("314 ")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314 ")
source = []byte("true")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, true)
source = []byte("false")
c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, false)
source = []byte("trued")

}
Loading

0 comments on commit 223bf28

Please sign in to comment.