From 223bf2800488ad5d38854bbb595d789bc35ebe32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 7 Jul 2022 16:11:47 +0200 Subject: [PATCH] parser/pageparser: Don't store the byte slices On its own this change doesn't do any magic, but this is part of a bigger picture about making Hugo leaner in the memory usage department. --- .github/workflows/test.yml | 1 + hugolib/page.go | 24 +-- hugolib/page__content.go | 2 +- hugolib/shortcode.go | 26 ++-- hugolib/shortcode_test.go | 6 +- parser/pageparser/item.go | 71 +++++++-- parser/pageparser/item_test.go | 27 ++-- parser/pageparser/pagelexer.go | 54 +++++-- parser/pageparser/pageparser.go | 79 +++++++--- parser/pageparser/pageparser_intro_test.go | 118 ++++++++++----- parser/pageparser/pageparser_main_test.go | 22 +-- .../pageparser/pageparser_shortcode_test.go | 140 +++++++++--------- parser/pageparser/pageparser_test.go | 13 ++ 13 files changed, 385 insertions(+), 198 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1403c4d572a..e1b78e1a51d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -50,6 +50,7 @@ jobs: - if: matrix.os == 'windows-latest' run: | choco install pandoc + choco install mingw --version 10.2.0 --allow-downgrade - run: pandoc -v - if: matrix.os == 'ubuntu-latest' name: Install dart-sass-embedded Linux diff --git a/hugolib/page.go b/hugolib/page.go index e37b47300c4..4752d11f163 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -639,7 +639,7 @@ func (p *pageState) mapContentForResult( if fe, ok := err.(herrors.FileError); ok { return fe } - return p.parseError(err, iter.Input(), i.Pos) + return p.parseError(err, result.Input(), i.Pos()) } // the parser is guaranteed to return items in proper order or fail, so … @@ -656,14 +656,14 @@ Loop: case it.Type == pageparser.TypeIgnore: case it.IsFrontMatter(): f := pageparser.FormatFromFrontMatterType(it.Type) - m, err := metadecoders.Default.UnmarshalToMap(it.Val, f) + m, err := metadecoders.Default.UnmarshalToMap(it.Val(result.Input()), f) if err != nil { if fe, ok := err.(herrors.FileError); ok { pos := fe.Position() // Apply the error to the content file. pos.Filename = p.File().Filename() // Offset the starting position of front matter. - offset := iter.LineNumber() - 1 + offset := iter.LineNumber(result.Input()) - 1 if f == metadecoders.YAML { offset -= 1 } @@ -687,7 +687,7 @@ Loop: next := iter.Peek() if !next.IsDone() { - p.source.posMainContent = next.Pos + p.source.posMainContent = next.Pos() } if !p.s.shouldBuild(p) { @@ -699,10 +699,10 @@ Loop: posBody := -1 f := func(item pageparser.Item) bool { if posBody == -1 && !item.IsDone() { - posBody = item.Pos + posBody = item.Pos() } - if item.IsNonWhitespace() { + if item.IsNonWhitespace(result.Input()) { p.truncated = true // Done @@ -712,7 +712,7 @@ Loop: } iter.PeekWalk(f) - p.source.posSummaryEnd = it.Pos + p.source.posSummaryEnd = it.Pos() p.source.posBodyStart = posBody p.source.hasSummaryDivider = true @@ -727,13 +727,13 @@ Loop: // let extractShortcode handle left delim (will do so recursively) iter.Backup() - currShortcode, err := s.extractShortcode(ordinal, 0, iter) + currShortcode, err := s.extractShortcode(ordinal, 0, result.Input(), iter) if err != nil { return fail(err, it) } - currShortcode.pos = it.Pos - currShortcode.length = iter.Current().Pos - it.Pos + currShortcode.pos = it.Pos() + currShortcode.length = iter.Current().Pos() - it.Pos() if currShortcode.placeholder == "" { currShortcode.placeholder = createShortcodePlaceholder("s", currShortcode.ordinal) } @@ -754,7 +754,7 @@ Loop: rn.AddShortcode(currShortcode) case it.Type == pageparser.TypeEmoji: - if emoji := helpers.Emoji(it.ValStr()); emoji != nil { + if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil { rn.AddReplacement(emoji, it) } else { rn.AddBytes(it) @@ -762,7 +762,7 @@ Loop: case it.IsEOF(): break Loop case it.IsError(): - err := fail(errors.New(it.ValStr()), it) + err := fail(errors.New(it.ValStr(result.Input())), it) currShortcode.err = err return err diff --git a/hugolib/page__content.go b/hugolib/page__content.go index bf69fafcda4..a721d1fce97 100644 --- a/hugolib/page__content.go +++ b/hugolib/page__content.go @@ -45,7 +45,7 @@ func (p pageContent) contentToRender(parsed pageparser.Result, pm *pageContentMa for _, it := range pm.items { switch v := it.(type) { case pageparser.Item: - c = append(c, source[v.Pos:v.Pos+len(v.Val)]...) + c = append(c, source[v.Pos():v.Pos()+len(v.Val(source))]...) case pageContentReplacement: c = append(c, v.val...) case *shortcode: diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go index 33767fc6811..1627acacb9c 100644 --- a/hugolib/shortcode.go +++ b/hugolib/shortcode.go @@ -509,7 +509,7 @@ func (s *shortcodeHandler) parseError(err error, input []byte, pos int) error { // pageTokens state: // - before: positioned just before the shortcode start // - after: shortcode(s) consumed (plural when they are nested) -func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.Iterator) (*shortcode, error) { +func (s *shortcodeHandler) extractShortcode(ordinal, level int, source []byte, pt *pageparser.Iterator) (*shortcode, error) { if s == nil { panic("handler nil") } @@ -520,7 +520,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I pt.Backup() item := pt.Next() if item.IsIndentation() { - sc.indentation = string(item.Val) + sc.indentation = item.ValStr(source) } } @@ -530,7 +530,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I const errorPrefix = "failed to extract shortcode" fail := func(err error, i pageparser.Item) error { - return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), pt.Input(), i.Pos) + return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), source, i.Pos()) } Loop: @@ -550,7 +550,7 @@ Loop: if cnt > 0 { // nested shortcode; append it to inner content pt.Backup() - nested, err := s.extractShortcode(nestedOrdinal, nextLevel, pt) + nested, err := s.extractShortcode(nestedOrdinal, nextLevel, source, pt) nestedOrdinal++ if nested != nil && nested.name != "" { s.addName(nested.name) @@ -589,7 +589,7 @@ Loop: // return that error, more specific continue } - return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.Val), next) + return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.ValStr(source)), next) } } if next.IsRightShortcodeDelim() { @@ -602,11 +602,11 @@ Loop: return sc, nil case currItem.IsText(): - sc.inner = append(sc.inner, currItem.ValStr()) + sc.inner = append(sc.inner, currItem.ValStr(source)) case currItem.Type == pageparser.TypeEmoji: // TODO(bep) avoid the duplication of these "text cases", to prevent // more of #6504 in the future. - val := currItem.ValStr() + val := currItem.ValStr(source) if emoji := helpers.Emoji(val); emoji != nil { sc.inner = append(sc.inner, string(emoji)) } else { @@ -614,7 +614,7 @@ Loop: } case currItem.IsShortcodeName(): - sc.name = currItem.ValStr() + sc.name = currItem.ValStr(source) // Used to check if the template expects inner content. templs := s.s.Tmpl().LookupVariants(sc.name) @@ -625,7 +625,7 @@ Loop: sc.info = templs[0].(tpl.Info) sc.templs = templs case currItem.IsInlineShortcodeName(): - sc.name = currItem.ValStr() + sc.name = currItem.ValStr(source) sc.isInline = true case currItem.IsShortcodeParam(): if !pt.IsValueNext() { @@ -634,11 +634,11 @@ Loop: // named params if sc.params == nil { params := make(map[string]any) - params[currItem.ValStr()] = pt.Next().ValTyped() + params[currItem.ValStr(source)] = pt.Next().ValTyped(source) sc.params = params } else { if params, ok := sc.params.(map[string]any); ok { - params[currItem.ValStr()] = pt.Next().ValTyped() + params[currItem.ValStr(source)] = pt.Next().ValTyped(source) } else { return sc, errShortCodeIllegalState } @@ -647,11 +647,11 @@ Loop: // positional params if sc.params == nil { var params []any - params = append(params, currItem.ValTyped()) + params = append(params, currItem.ValTyped(source)) sc.params = params } else { if params, ok := sc.params.([]any); ok { - params = append(params, currItem.ValTyped()) + params = append(params, currItem.ValTyped(source)) sc.params = params } else { return sc, errShortCodeIllegalState diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go index 5b8a5c2959a..3f919096259 100644 --- a/hugolib/shortcode_test.go +++ b/hugolib/shortcode_test.go @@ -112,7 +112,7 @@ title: "Shortcodes Galore!" handler := newShortcodeHandler(nil, s) iter := p.Iterator() - short, err := handler.extractShortcode(0, 0, iter) + short, err := handler.extractShortcode(0, 0, p.Input(), iter) test.check(c, short, err) }) @@ -763,7 +763,7 @@ title: "Hugo Rocks!" ) } -func TestShortcodeTypedParams(t *testing.T) { +func TestShortcodeParams(t *testing.T) { t.Parallel() c := qt.New(t) @@ -778,6 +778,7 @@ title: "Hugo Rocks!" types positional: {{< hello true false 33 3.14 >}} types named: {{< hello b1=true b2=false i1=33 f1=3.14 >}} types string: {{< hello "true" trues "33" "3.14" >}} +escaped quoute: {{< hello "hello \"world\"." >}} `).WithTemplatesAdded( @@ -796,6 +797,7 @@ Get: {{ printf "%v (%T)" $b1 $b1 | safeHTML }} "types positional: - 0: true (bool) - 1: false (bool) - 2: 33 (int) - 3: 3.14 (float64)", "types named: - b1: true (bool) - b2: false (bool) - f1: 3.14 (float64) - i1: 33 (int) Get: true (bool) ", "types string: - 0: true (string) - 1: trues (string) - 2: 33 (string) - 3: 3.14 (string) ", + "hello "world". (string)", ) } diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index 52546be4193..2083be70a94 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -22,21 +22,59 @@ import ( "github.com/yuin/goldmark/util" ) +type lowHigh struct { + Low int + High int +} + type Item struct { - Type ItemType - Pos int - Val []byte + Type ItemType + Err error + + // The common case is a single segment. + low int + high int + + // This is the uncommon case. + segments []lowHigh + + // Used for validation. + firstByte byte + isString bool } type Items []Item -func (i Item) ValStr() string { - return string(i.Val) +func (i Item) Pos() int { + if len(i.segments) > 0 { + return i.segments[0].Low + } + return i.low +} + +func (i Item) Val(source []byte) []byte { + if len(i.segments) == 0 { + return source[i.low:i.high] + } + + if len(i.segments) == 1 { + return source[i.segments[0].Low:i.segments[0].High] + } + + var b bytes.Buffer + for _, s := range i.segments { + b.Write(source[s.Low:s.High]) + } + return b.Bytes() +} + +func (i Item) ValStr(source []byte) string { + return string(i.Val(source)) } -func (i Item) ValTyped() any { - str := i.ValStr() +func (i Item) ValTyped(source []byte) any { + str := i.ValStr(source) if i.isString { // A quoted value that is a string even if it looks like a number etc. return str @@ -73,8 +111,8 @@ func (i Item) IsIndentation() bool { return i.Type == tIndentation } -func (i Item) IsNonWhitespace() bool { - return len(bytes.TrimSpace(i.Val)) > 0 +func (i Item) IsNonWhitespace(source []byte) bool { + return len(bytes.TrimSpace(i.Val(source))) > 0 } func (i Item) IsShortcodeName() bool { @@ -125,20 +163,21 @@ func (i Item) IsError() bool { return i.Type == tError } -func (i Item) String() string { +func (i Item) ToString(source []byte) string { + val := i.Val(source) switch { case i.Type == tEOF: return "EOF" case i.Type == tError: - return string(i.Val) + return string(val) case i.Type == tIndentation: - return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val)) + return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val)) case i.Type > tKeywordMarker: - return fmt.Sprintf("<%s>", i.Val) - case len(i.Val) > 50: - return fmt.Sprintf("%v:%.20q...", i.Type, i.Val) + return fmt.Sprintf("<%s>", val) + case len(val) > 50: + return fmt.Sprintf("%v:%.20q...", i.Type, val) } - return fmt.Sprintf("%v:[%s]", i.Type, i.Val) + return fmt.Sprintf("%v:[%s]", i.Type, val) } type ItemType int diff --git a/parser/pageparser/item_test.go b/parser/pageparser/item_test.go index cd01202c6c4..db4cc127a82 100644 --- a/parser/pageparser/item_test.go +++ b/parser/pageparser/item_test.go @@ -22,13 +22,22 @@ import ( func TestItemValTyped(t *testing.T) { c := qt.New(t) - c.Assert(Item{Val: []byte("3.14")}.ValTyped(), qt.Equals, float64(3.14)) - c.Assert(Item{Val: []byte(".14")}.ValTyped(), qt.Equals, float64(.14)) - c.Assert(Item{Val: []byte("314")}.ValTyped(), qt.Equals, 314) - c.Assert(Item{Val: []byte("314x")}.ValTyped(), qt.Equals, "314x") - c.Assert(Item{Val: []byte("314 ")}.ValTyped(), qt.Equals, "314 ") - c.Assert(Item{Val: []byte("314"), isString: true}.ValTyped(), qt.Equals, "314") - c.Assert(Item{Val: []byte("true")}.ValTyped(), qt.Equals, true) - c.Assert(Item{Val: []byte("false")}.ValTyped(), qt.Equals, false) - c.Assert(Item{Val: []byte("trues")}.ValTyped(), qt.Equals, "trues") + source := []byte("3.14") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(3.14)) + source = []byte(".14") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(0.14)) + source = []byte("314") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, 314) + source = []byte("314") + c.Assert(Item{low: 0, high: len(source), isString: true}.ValTyped(source), qt.Equals, "314") + source = []byte("314x") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314x") + source = []byte("314 ") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314 ") + source = []byte("true") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, true) + source = []byte("false") + c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, false) + source = []byte("trued") + } diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 770f26eb9dc..a7e6b6cd4e7 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -54,7 +54,7 @@ type pageLexer struct { // Implement the Result interface func (l *pageLexer) Iterator() *Iterator { - return l.newIterator() + return NewIterator(l.items) } func (l *pageLexer) Input() []byte { @@ -85,10 +85,6 @@ func newPageLexer(input []byte, stateStart stateFunc, cfg Config) *pageLexer { return lexer } -func (l *pageLexer) newIterator() *Iterator { - return &Iterator{l: l, lastPos: -1} -} - // main loop func (l *pageLexer) run() *pageLexer { for l.state = l.stateStart; l.state != nil; { @@ -136,6 +132,13 @@ func (l *pageLexer) backup() { l.pos -= l.width } +func (l *pageLexer) append(item Item) { + if item.Pos() < len(l.input) { + item.firstByte = l.input[item.Pos()] + } + l.items = append(l.items, item) +} + // sends an item back to the client. func (l *pageLexer) emit(t ItemType) { defer func() { @@ -151,11 +154,11 @@ func (l *pageLexer) emit(t ItemType) { break } if i == l.start && b != '\n' { - l.items = append(l.items, Item{tIndentation, l.start, l.input[l.start:l.pos], false}) + l.append(Item{Type: tIndentation, low: l.start, high: l.pos}) return } else if b == '\n' && i < l.pos-1 { - l.items = append(l.items, Item{t, l.start, l.input[l.start : i+1], false}) - l.items = append(l.items, Item{tIndentation, i + 1, l.input[i+1 : l.pos], false}) + l.append(Item{Type: t, low: l.start, high: i + 1}) + l.append(Item{Type: tIndentation, low: i + 1, high: l.pos}) return } else if b == '\n' && i == l.pos-1 { break @@ -164,13 +167,13 @@ func (l *pageLexer) emit(t ItemType) { } } - l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos], false}) + l.append(Item{Type: t, low: l.start, high: l.pos}) } // sends a string item back to the client. func (l *pageLexer) emitString(t ItemType) { - l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos], true}) + l.append(Item{Type: t, low: l.start, high: l.pos, isString: true}) l.start = l.pos } @@ -180,14 +183,33 @@ func (l *pageLexer) isEOF() bool { // special case, do not send '\\' back to client func (l *pageLexer) ignoreEscapesAndEmit(t ItemType, isString bool) { - val := bytes.Map(func(r rune) rune { + i := l.start + k := i + + var segments []lowHigh + + for i < l.pos { + r, w := utf8.DecodeRune(l.input[i:l.pos]) if r == '\\' { - return -1 + if i > k { + segments = append(segments, lowHigh{k, i}) + } + l.append(Item{Type: TypeIgnore, low: i, high: i + w}) + k = i + w } - return r - }, l.input[l.start:l.pos]) - l.items = append(l.items, Item{t, l.start, val, isString}) + i += w + } + + if k < l.pos { + segments = append(segments, lowHigh{k, l.pos}) + } + + if len(segments) > 0 { + l.append(Item{Type: t, segments: segments}) + } + l.start = l.pos + } // gets the current value (for debugging and error handling) @@ -204,7 +226,7 @@ var lf = []byte("\n") // nil terminates the parser func (l *pageLexer) errorf(format string, args ...any) stateFunc { - l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...)), true}) + l.append(Item{Type: tError, Err: fmt.Errorf(format, args...)}) return nil } diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go index 67abefc3089..0a9fc61af41 100644 --- a/parser/pageparser/pageparser.go +++ b/parser/pageparser/pageparser.go @@ -15,6 +15,7 @@ package pageparser import ( "bytes" + "errors" "fmt" "io" "io/ioutil" @@ -33,9 +34,6 @@ type Result interface { var _ Result = (*pageLexer)(nil) // Parse parses the page in the given reader according to the given Config. -// TODO(bep) now that we have improved the "lazy order" init, it *may* be -// some potential saving in doing a buffered approach where the first pass does -// the frontmatter only. func Parse(r io.Reader, cfg Config) (Result, error) { return parseSection(r, cfg, lexIntroSection) } @@ -63,12 +61,12 @@ func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) { walkFn := func(item Item) bool { if frontMatterSource != nil { // The rest is content. - cf.Content = psr.Input()[item.Pos:] + cf.Content = psr.Input()[item.low:] // Done return false } else if item.IsFrontMatter() { cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type) - frontMatterSource = item.Val + frontMatterSource = item.Val(psr.Input()) } return true } @@ -113,10 +111,15 @@ func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) { return lexer, nil } +// NewIterator creates a new Iterator. +func NewIterator(items Items) *Iterator { + return &Iterator{items: items, lastPos: -1} +} + // An Iterator has methods to iterate a parsed page with support going back // if needed. type Iterator struct { - l *pageLexer + items Items lastPos int // position of the last item returned by nextItem } @@ -126,19 +129,14 @@ func (t *Iterator) Next() Item { return t.Current() } -// Input returns the input source. -func (t *Iterator) Input() []byte { - return t.l.Input() -} - -var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens"), true} +var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")} // Current will repeatably return the current item. func (t *Iterator) Current() Item { - if t.lastPos >= len(t.l.items) { + if t.lastPos >= len(t.items) { return errIndexOutOfBounds } - return t.l.items[t.lastPos] + return t.items[t.lastPos] } // backs up one token. @@ -163,14 +161,14 @@ func (t *Iterator) IsValueNext() bool { // look at, but do not consume, the next item // repeated, sequential calls will return the same item func (t *Iterator) Peek() Item { - return t.l.items[t.lastPos+1] + return t.items[t.lastPos+1] } // PeekWalk will feed the next items in the iterator to walkFn // until it returns false. func (t *Iterator) PeekWalk(walkFn func(item Item) bool) { - for i := t.lastPos + 1; i < len(t.l.items); i++ { - item := t.l.items[i] + for i := t.lastPos + 1; i < len(t.items); i++ { + item := t.items[i] if !walkFn(item) { break } @@ -190,6 +188,49 @@ func (t *Iterator) Consume(cnt int) { } // LineNumber returns the current line number. Used for logging. -func (t *Iterator) LineNumber() int { - return bytes.Count(t.l.input[:t.Current().Pos], lf) + 1 +func (t *Iterator) LineNumber(source []byte) int { + return bytes.Count(source[:t.Current().low], lf) + 1 +} + +// IsProbablySourceOfItems returns true if the given source looks like original +// source of the items. +// There may be some false positives, but that is highly unlikely and good enough +// for the planned purpose. +// It will also return false if the last item is not EOF (error situations) and +// true if both source and items are empty. +func IsProbablySourceOfItems(source []byte, items Items) bool { + if len(source) == 0 && len(items) == 0 { + return false + } + if len(items) == 0 { + return false + } + + last := items[len(items)-1] + if last.Type != tEOF { + return false + } + + if last.Pos() != len(source) { + return false + } + + for _, item := range items { + if item.Type == tError { + return false + } + if item.Type == tEOF { + return true + } + + if item.Pos() >= len(source) { + return false + } + + if item.firstByte != source[item.Pos()] { + return false + } + } + + return true } diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go index 1b903d546f6..1b2d59ccca4 100644 --- a/parser/pageparser/pageparser_intro_test.go +++ b/parser/pageparser/pageparser_intro_test.go @@ -15,19 +15,25 @@ package pageparser import ( "fmt" - "reflect" "strings" "testing" + + qt "github.com/frankban/quicktest" ) type lexerTest struct { name string input string - items []Item + items []typeText +} + +type typeText struct { + typ ItemType + text string } -func nti(tp ItemType, val string) Item { - return Item{tp, 0, []byte(val), false} +func nti(tp ItemType, val string) typeText { + return typeText{typ: tp, text: val} } var ( @@ -52,48 +58,79 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$") // TODO(bep) a way to toggle ORG mode vs the rest. var frontMatterTests = []lexerTest{ - {"empty", "", []Item{tstEOF}}, - {"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}}, - {"HTML Document", ` `, []Item{nti(tError, "plain HTML documents not supported")}}, - {"HTML Document with shortcode", `{{< sc1 >}}`, []Item{nti(tError, "plain HTML documents not supported")}}, - {"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}}, - {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, - {"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}}, - {"YAML commented out front matter", "\nSome text.\n", []Item{nti(TypeIgnore, ""), tstSomeText, tstEOF}}, - {"YAML commented out front matter, no end", "\nSome text.\n", []typeText{nti(TypeIgnore, ""), tstSomeText, tstEOF}}, + {"YAML commented out front matter, no end", "\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, nti(tText, "Some text.\n"), tstEOF}}, - {"Summary divider same line", "+++\nfoo = \"bar\"\n+++\n\nSome text.Some text.\n", []Item{tstFrontMatterTOML, nti(tText, "\nSome text."), nti(TypeLeadSummaryDivider, ""), nti(tText, "Some text.\n"), tstEOF}}, + {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}}, + {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}}, + {"JSON front matter", tstJSON + "\r\n\nSome text.\n", []typeText{tstFrontMatterJSON, tstSomeText, tstEOF}}, + {"ORG front matter", tstORG + "\nSome text.\n", []typeText{tstFrontMatterORG, tstSomeText, tstEOF}}, + {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []typeText{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more\n"), nti(tText, "Some text.\n"), tstEOF}}, + {"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, nti(tText, "Some text.\n"), tstEOF}}, + {"Summary divider same line", "+++\nfoo = \"bar\"\n+++\n\nSome text.Some text.\n", []typeText{tstFrontMatterTOML, nti(tText, "\nSome text."), nti(TypeLeadSummaryDivider, ""), nti(tText, "Some text.\n"), tstEOF}}, // https://github.com/gohugoio/hugo/issues/5402 - {"Summary and shortcode, no space", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n{{< sc1 >}}\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, nti(TypeLeadSummaryDivider, ""), tstLeftNoMD, tstSC1, tstRightNoMD, tstSomeText, tstEOF}}, + {"Summary and shortcode, no space", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n{{< sc1 >}}\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, nti(TypeLeadSummaryDivider, ""), tstLeftNoMD, tstSC1, tstRightNoMD, tstSomeText, tstEOF}}, // https://github.com/gohugoio/hugo/issues/5464 - {"Summary and shortcode only", "+++\nfoo = \"bar\"\n+++\n{{< sc1 >}}\n\n{{< sc2 >}}", []Item{tstFrontMatterTOML, tstLeftNoMD, tstSC1, tstRightNoMD, tstNewline, tstSummaryDivider, tstLeftNoMD, tstSC2, tstRightNoMD, tstEOF}}, + {"Summary and shortcode only", "+++\nfoo = \"bar\"\n+++\n{{< sc1 >}}\n\n{{< sc2 >}}", []typeText{tstFrontMatterTOML, tstLeftNoMD, tstSC1, tstRightNoMD, tstNewline, tstSummaryDivider, tstLeftNoMD, tstSC2, tstRightNoMD, tstEOF}}, } func TestFrontMatter(t *testing.T) { t.Parallel() + c := qt.New(t) for i, test := range frontMatterTests { items := collect([]byte(test.input), false, lexIntroSection) - if !equal(items, test.items) { - got := crLfReplacer.Replace(fmt.Sprint(items)) - expected := crLfReplacer.Replace(fmt.Sprint(test.items)) - t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected) + if !equal(test.input, items, test.items) { + got := itemsToString(items, []byte(test.input)) + expected := testItemsToString(test.items) + c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name)) + } + } +} + +func itemsToString(items []Item, source []byte) string { + var sb strings.Builder + for i, item := range items { + var s string + if item.Err != nil { + s = item.Err.Error() + } else { + s = string(item.Val(source)) + } + sb.WriteString(fmt.Sprintf("%s: %s\n", item.Type, s)) + + if i < len(items)-1 { + sb.WriteString("\n") } } + return crLfReplacer.Replace(sb.String()) +} + +func testItemsToString(items []typeText) string { + var sb strings.Builder + for i, item := range items { + sb.WriteString(fmt.Sprintf("%s: %s\n", item.typ, item.text)) + + if i < len(items)-1 { + sb.WriteString("\n") + } + } + return crLfReplacer.Replace(sb.String()) } func collectWithConfig(input []byte, skipFrontMatter bool, stateStart stateFunc, cfg Config) (items []Item) { l := newPageLexer(input, stateStart, cfg) l.run() - t := l.newIterator() + iter := NewIterator(l.items) for { - item := t.Next() + item := iter.Next() items = append(items, item) if item.Type == tEOF || item.Type == tError { break @@ -108,19 +145,34 @@ func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items [] return collectWithConfig(input, skipFrontMatter, stateStart, cfg) } +func collectStringMain(input string) []Item { + return collect([]byte(input), true, lexMainSection) +} + // no positional checking, for now ... -func equal(i1, i2 []Item) bool { - if len(i1) != len(i2) { +func equal(source string, got []Item, expect []typeText) bool { + if len(got) != len(expect) { return false } - for k := range i1 { - if i1[k].Type != i2[k].Type { + sourceb := []byte(source) + for k := range got { + g := got[k] + e := expect[k] + if g.Type != e.typ { return false } - if !reflect.DeepEqual(i1[k].Val, i2[k].Val) { + var s string + if g.Err != nil { + s = g.Err.Error() + } else { + s = string(g.Val(sourceb)) + } + + if s != e.text { return false } + } return true } diff --git a/parser/pageparser/pageparser_main_test.go b/parser/pageparser/pageparser_main_test.go index 8fed2bffab8..4e3fe8e84d1 100644 --- a/parser/pageparser/pageparser_main_test.go +++ b/parser/pageparser/pageparser_main_test.go @@ -14,27 +14,29 @@ package pageparser import ( - "fmt" "testing" + + qt "github.com/frankban/quicktest" ) func TestMain(t *testing.T) { t.Parallel() + c := qt.New(t) mainTests := []lexerTest{ - {"emoji #1", "Some text with :emoji:", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}}, - {"emoji #2", "Some text with :emoji: and some text.", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}}, - {"looks like an emoji #1", "Some text and then :emoji", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}}, - {"looks like an emoji #2", "Some text and then ::", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}}, - {"looks like an emoji #3", ":Some :text", []Item{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}}, + {"emoji #1", "Some text with :emoji:", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}}, + {"emoji #2", "Some text with :emoji: and some text.", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}}, + {"looks like an emoji #1", "Some text and then :emoji", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}}, + {"looks like an emoji #2", "Some text and then ::", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}}, + {"looks like an emoji #3", ":Some :text", []typeText{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}}, } for i, test := range mainTests { items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true}) - if !equal(items, test.items) { - got := crLfReplacer.Replace(fmt.Sprint(items)) - expected := crLfReplacer.Replace(fmt.Sprint(test.items)) - t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected) + if !equal(test.input, items, test.items) { + got := itemsToString(items, []byte(test.input)) + expected := testItemsToString(test.items) + c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name)) } } } diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go index ce12975737d..a95d55ef362 100644 --- a/parser/pageparser/pageparser_shortcode_test.go +++ b/parser/pageparser/pageparser_shortcode_test.go @@ -15,6 +15,8 @@ package pageparser import ( "testing" + + qt "github.com/frankban/quicktest" ) var ( @@ -38,27 +40,28 @@ var ( tstParamFloat = nti(tScParam, "3.14") tstVal = nti(tScParamVal, "Hello World") tstText = nti(tText, "Hello World") + tstIgnoreEscape = nti(TypeIgnore, "\\") ) var shortCodeLexerTests = []lexerTest{ - {"empty", "", []Item{tstEOF}}, - {"spaces", " \t\n", []Item{nti(tText, " \t\n"), tstEOF}}, - {"text", `to be or not`, []Item{nti(tText, "to be or not"), tstEOF}}, - {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, - {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, + {"empty", "", []typeText{tstEOF}}, + {"spaces", " \t\n", []typeText{nti(tText, " \t\n"), tstEOF}}, + {"text", `to be or not`, []typeText{nti(tText, "to be or not"), tstEOF}}, + {"no markup", `{{< sc1 >}}`, []typeText{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, + {"with EOL", "{{< sc1 \n >}}", []typeText{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, - {"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}}, + {"forward slash inside name", `{{< sc/sub >}}`, []typeText{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}}, - {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}}, - {"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, - {"indented on new line", "Hello\n {{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, " "), tstLeftMD, tstSC1, tstRightMD, tstEOF}}, - {"indented on new line tab", "Hello\n\t{{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "\t"), tstLeftMD, tstSC1, tstRightMD, tstEOF}}, - {"indented on first line", " {{% sc1 %}}", []Item{nti(tIndentation, " "), tstLeftMD, tstSC1, tstRightMD, tstEOF}}, - {"mismatched rightDelim", `{{< sc1 %}}`, []Item{ + {"simple with markup", `{{% sc1 %}}`, []typeText{tstLeftMD, tstSC1, tstRightMD, tstEOF}}, + {"with spaces", `{{< sc1 >}}`, []typeText{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, + {"indented on new line", "Hello\n {{% sc1 %}}", []typeText{nti(tText, "Hello\n"), nti(tIndentation, " "), tstLeftMD, tstSC1, tstRightMD, tstEOF}}, + {"indented on new line tab", "Hello\n\t{{% sc1 %}}", []typeText{nti(tText, "Hello\n"), nti(tIndentation, "\t"), tstLeftMD, tstSC1, tstRightMD, tstEOF}}, + {"indented on first line", " {{% sc1 %}}", []typeText{nti(tIndentation, " "), tstLeftMD, tstSC1, tstRightMD, tstEOF}}, + {"mismatched rightDelim", `{{< sc1 %}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"), }}, - {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{ + {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []typeText{ tstLeftMD, tstSC1, tstRightMD, @@ -69,79 +72,79 @@ var shortCodeLexerTests = []lexerTest{ tstRightMD, tstEOF, }}, - {"close, but no open", `{{< /sc1 >}}`, []Item{ + {"close, but no open", `{{< /sc1 >}}`, []typeText{ tstLeftNoMD, nti(tError, "got closing shortcode, but none is open"), }}, - {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{ + {"close wrong", `{{< sc1 >}}{{< /another >}}`, []typeText{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, nti(tError, "closing tag for shortcode 'another' does not match start tag"), }}, - {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{ + {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []typeText{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, nti(tError, "closing tag for shortcode 'another' does not match start tag"), }}, - {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{ + {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []typeText{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, nti(tError, "unclosed shortcode"), }}, - {"float param, positional", `{{< sc1 3.14 >}}`, []Item{ + {"float param, positional", `{{< sc1 3.14 >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "3.14"), tstRightNoMD, tstEOF, }}, - {"float param, named", `{{< sc1 param1=3.14 >}}`, []Item{ + {"float param, named", `{{< sc1 param1=3.14 >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, "3.14"), tstRightNoMD, tstEOF, }}, - {"named param, raw string", `{{< sc1 param1=` + "`" + "Hello World" + "`" + " >}}", []Item{ + {"named param, raw string", `{{< sc1 param1=` + "`" + "Hello World" + "`" + " >}}", []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, "Hello World"), tstRightNoMD, tstEOF, }}, - {"float param, named, space before", `{{< sc1 param1= 3.14 >}}`, []Item{ + {"float param, named, space before", `{{< sc1 param1= 3.14 >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, "3.14"), tstRightNoMD, tstEOF, }}, - {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{ + {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-Q_456igdO-4"), tstRightNoMD, tstEOF, }}, - {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{ + {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-.%QigdO-4"), tstRightNoMD, tstEOF, }}, - {"raw string", `{{< sc1` + "`" + "Hello World" + "`" + ` >}}`, []Item{ + {"raw string", `{{< sc1` + "`" + "Hello World" + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "Hello World"), tstRightNoMD, tstEOF, }}, {"raw string with newline", `{{< sc1` + "`" + `Hello - World` + "`" + ` >}}`, []Item{ + World` + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, `Hello World`), tstRightNoMD, tstEOF, }}, - {"raw string with escape character", `{{< sc1` + "`" + `Hello \b World` + "`" + ` >}}`, []Item{ + {"raw string with escape character", `{{< sc1` + "`" + `Hello \b World` + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, `Hello \b World`), tstRightNoMD, tstEOF, }}, - {"two params", `{{< sc1 param1 param2 >}}`, []Item{ + {"two params", `{{< sc1 param1 param2 >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF, }}, // issue #934 - {"self-closing", `{{< sc1 />}}`, []Item{ + {"self-closing", `{{< sc1 />}}`, []typeText{ tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF, }}, // Issue 2498 - {"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{ + {"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []typeText{ tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF, }}, - {"self-closing with param", `{{< sc1 param1 />}}`, []Item{ + {"self-closing with param", `{{< sc1 param1 />}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF, }}, - {"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{ + {"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF, }}, - {"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{ + {"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF, }}, - {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{ + {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []typeText{ tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSC2, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF, }}, - {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{ + {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []typeText{ tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, "ab"), tstLeftMD, tstSC2, tstParam1, tstRightMD, @@ -156,106 +159,109 @@ var shortCodeLexerTests = []lexerTest{ nti(tText, "kl"), tstEOF, }}, - {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{ + {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "param nr. 1"), nti(tScParam, "param nr. 2"), tstRightNoMD, tstEOF, }}, - {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{ + {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, nti(tScParamVal, "p2Val"), tstRightNoMD, tstEOF, }}, - {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{ + {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF, }}, - {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{ + {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF, }}, - {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{ + {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello `), nti(tError, `got positional parameter 'escaped'. Cannot mix named and positional parameters`), }}, { "escaped quotes inside nonescaped quotes", `{{< sc1 param1="Hello \"escaped\" World" >}}`, - []Item{ - tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF, + []typeText{ + tstLeftNoMD, tstSC1, tstParam1, tstIgnoreEscape, tstIgnoreEscape, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF, }, }, { "escaped quotes inside nonescaped quotes in positional param", `{{< sc1 "Hello \"escaped\" World" >}}`, - []Item{ - tstLeftNoMD, tstSC1, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF, + []typeText{ + tstLeftNoMD, tstSC1, tstIgnoreEscape, tstIgnoreEscape, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF, }, }, - {"escaped raw string, named param", `{{< sc1 param1=` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []Item{ + {"escaped raw string, named param", `{{< sc1 param1=` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tError, "unrecognized escape character"), }}, - {"escaped raw string, positional param", `{{< sc1 param1 ` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []Item{ + {"escaped raw string, positional param", `{{< sc1 param1 ` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tError, "unrecognized escape character"), }}, - {"two raw string params", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []Item{ + {"two raw string params", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "Hello World"), nti(tScParam, "Second Param"), tstRightNoMD, tstEOF, }}, - {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{ + {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam2, nti(tError, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"), }}, - {"unterminated raw string", `{{< sc1` + "`" + "Hello World" + ` >}}`, []Item{ + {"unterminated raw string", `{{< sc1` + "`" + "Hello World" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tError, "unterminated raw string in shortcode parameter-argument: 'Hello World >}}'"), }}, - {"unterminated raw string in second argument", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + ` >}}`, []Item{ + {"unterminated raw string in second argument", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, nti(tScParam, "Hello World"), nti(tError, "unterminated raw string in shortcode parameter-argument: 'Second Param >}}'"), }}, - {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{ + {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstVal, nti(tError, "got positional parameter 'p2'. Cannot mix named and positional parameters"), }}, - {"one named param, one quoted positional param, both raw strings", `{{< sc1 param1=` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []Item{ + {"one named param, one quoted positional param, both raw strings", `{{< sc1 param1=` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstVal, nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters"), }}, - {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{ + {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, tstVal, nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters"), }}, - {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{ + {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters"), }}, - {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{ + {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []typeText{ tstLeftNoMD, tstSC1, tstParam1, nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters"), }}, - {"commented out", `{{}}`, []Item{ + {"commented out", `{{}}`, []typeText{ nti(tText, "{{<"), nti(tText, " sc1 "), nti(tText, ">}}"), tstEOF, }}, - {"commented out, with asterisk inside", `{{}}`, []Item{ + {"commented out, with asterisk inside", `{{}}`, []typeText{ nti(tText, "{{<"), nti(tText, " sc1 \"**/*.pdf\" "), nti(tText, ">}}"), tstEOF, }}, - {"commented out, missing close", `{{}}`, []Item{ + {"commented out, missing close", `{{}}`, []typeText{ nti(tError, "comment must be closed"), }}, - {"commented out, misplaced close", `{{}}*/`, []Item{ + {"commented out, misplaced close", `{{}}*/`, []typeText{ nti(tError, "comment must be closed"), }}, // Inline shortcodes - {"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}}, - {"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}}, - {"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}}, - {"inline self closing, then a new inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}{{< sc2.inline >}}Hello World{{< /sc2.inline >}}`, []Item{ + {"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}}, + {"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}}, + {"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}}, + {"inline self closing, then a new inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}{{< sc2.inline >}}Hello World{{< /sc2.inline >}}`, []typeText{ tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstLeftNoMD, tstSC2Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC2Inline, tstRightNoMD, tstEOF, }}, - {"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}}, - {"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}}, - {"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []Item{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}}, + {"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}}, + {"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}}, + {"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []typeText{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}}, } func TestShortcodeLexer(t *testing.T) { t.Parallel() + c := qt.New(t) for i, test := range shortCodeLexerTests { t.Run(test.name, func(t *testing.T) { items := collect([]byte(test.input), true, lexMainSection) - if !equal(items, test.items) { - t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items) + if !equal(test.input, items, test.items) { + got := itemsToString(items, []byte(test.input)) + expected := testItemsToString(test.items) + c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name)) } }) } diff --git a/parser/pageparser/pageparser_test.go b/parser/pageparser/pageparser_test.go index f7f719938a2..a21b97970cf 100644 --- a/parser/pageparser/pageparser_test.go +++ b/parser/pageparser/pageparser_test.go @@ -14,6 +14,7 @@ package pageparser import ( + "bytes" "strings" "testing" @@ -88,3 +89,15 @@ func TestFormatFromFrontMatterType(t *testing.T) { c.Assert(FormatFromFrontMatterType(test.typ), qt.Equals, test.expect) } } + +func TestIsProbablyItemsSource(t *testing.T) { + c := qt.New(t) + + input := ` {{< foo >}} ` + items := collectStringMain(input) + + c.Assert(IsProbablySourceOfItems([]byte(input), items), qt.IsTrue) + c.Assert(IsProbablySourceOfItems(bytes.Repeat([]byte(" "), len(input)), items), qt.IsFalse) + c.Assert(IsProbablySourceOfItems([]byte(`{{< foo >}} `), items), qt.IsFalse) + c.Assert(IsProbablySourceOfItems([]byte(``), items), qt.IsFalse) +}