Skip to content

Commit

Permalink
feat: add support for featured images
Browse files Browse the repository at this point in the history
```
The featured image information in WordPress is stored in the postmeta table. This table stores custom fields associated with posts, pages, and other post types.

The featured image is stored as a meta key named _thumbnail_id. The meta value is the ID of the attachment post that represents the featured image.
``` - via Google Gemini that finally led me to solve this problem of featured images
  • Loading branch information
ashishb committed Aug 27, 2024
1 parent 627f584 commit 2724ca5
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 81 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ $ make build_prod
1. [x] WordPress [footnotes](https://github.com/ashishb/wp2hugo/issues/24)
1. [x] WordPress page author
1. [x] Ability to filter by author(s), useful for [WordPress multi-site](https://www.smashingmagazine.com/2020/01/complete-guide-wordpress-multisite/) migrations
1. [ ] Featured images - I tried this [WordPress plugin](https://wordpress.org/plugins/export-media-with-selected-content/) but featured images are simply not exported
1. [x] Featured images - export featured image associations with pages and posts correctly
## Why existing tools don't work
Expand Down
119 changes: 65 additions & 54 deletions src/wp2hugo/internal/hugogenerator/hugo_gen_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func (g Generator) writePage(outputMediaDirPath string, pagePath string, page wp
return fmt.Errorf("error parsing page URL: %s", err)
}

p, err := g.NewHugoPage(pageURL, page)
p, err := g.newHugoPage(pageURL, page)
if err != nil {
return fmt.Errorf("error creating Hugo page: %s", err)
}
Expand All @@ -274,68 +274,79 @@ func (g Generator) writePage(outputMediaDirPath string, pagePath string, page wp
}
log.Info().Msgf("Page written: %s", pagePath)

if g.downloadMedia {
err := g.downloadPageMedia(outputMediaDirPath, p, pageURL)
if err != nil {
return err
}
}
return nil
}

func (g Generator) newHugoPage(pageURL *url.URL, page wpparser.CommonFields) (*hugopage.Page, error) {
return hugopage.NewPage(
g.imageURLProvider,
*pageURL, page.Author, page.Title, page.PublishDate,
page.PublishStatus == wpparser.PublishStatusDraft || page.PublishStatus == wpparser.PublishStatusPending,
page.Categories, page.Tags, page.Footnotes, page.Content, page.GUID, page.FeaturedImageID)
}

func (g Generator) downloadPageMedia(outputMediaDirPath string, p *hugopage.Page, pageURL *url.URL) error {
links := p.WPImageLinks()
log.Debug().
Str("page", page.Title).
Str("page", pageURL.String()).
Int("links", len(links)).
Msgf("Embedded media links")

log.Debug().
Int("links", len(links)).
Msg("Downloading media files")

hostname := pageURL.Host
prefixes := make([]string, 0)
pageURL.Host = strings.TrimPrefix(pageURL.Host, "www.")
prefixes = append(prefixes, fmt.Sprintf("https://%s", pageURL.Host))
prefixes = append(prefixes, fmt.Sprintf("http://%s", pageURL.Host))
prefixes = append(prefixes, fmt.Sprintf("https://www.%s", pageURL.Host))
prefixes = append(prefixes, fmt.Sprintf("http://www.%s", pageURL.Host))
hostname = strings.TrimPrefix(hostname, "www.")
prefixes = append(prefixes, fmt.Sprintf("https://%s", hostname))
prefixes = append(prefixes, fmt.Sprintf("http://%s", hostname))
prefixes = append(prefixes, fmt.Sprintf("https://www.%s", hostname))
prefixes = append(prefixes, fmt.Sprintf("http://www.%s", hostname))

if g.downloadMedia {
log.Debug().
Int("links", len(links)).
Msg("Downloading media files")
for _, link := range links {
for _, prefix := range prefixes {
link = strings.TrimPrefix(link, prefix)
}
if !strings.HasPrefix(link, "/") {
log.Warn().
Str("link", link).
Str("source", page.Link).
Msg("non-relative link")
}
outputFilePath := fmt.Sprintf("%s/static/%s", outputMediaDirPath, strings.TrimSuffix(link, "/"))
if !strings.HasPrefix(link, "http") {
link = g.wpInfo.Link + link
}
media, err := g.mediaProvider.GetReader(link)
if err != nil {
if g.continueOnMediaDownloadFailure {
log.Error().
Err(err).
Str("mediaLink", link).
Str("pageLink", page.Link).
Msg("error fetching media file")
continue
}
return fmt.Errorf("error fetching media file %s: %s", link, err)
for _, link := range links {
for _, prefix := range prefixes {
link = strings.TrimPrefix(link, prefix)
}
if !strings.HasPrefix(link, "/") {
log.Warn().
Str("link", link).
Str("source", pageURL.String()).
Msg("non-relative link")
}
outputFilePath := fmt.Sprintf("%s/static/%s", outputMediaDirPath, strings.TrimSuffix(link, "/"))
if !strings.HasPrefix(link, "http") {
link = g.wpInfo.Link + link
}
media, err := g.mediaProvider.GetReader(link)
if err != nil {
if g.continueOnMediaDownloadFailure {
log.Error().
Err(err).
Str("mediaLink", link).
Str("pageLink", pageURL.String()).
Msg("error fetching media file")
continue
}
if err = download(outputFilePath, media); err != nil {
if g.continueOnMediaDownloadFailure {
log.Error().
Err(err).
Str("mediaLink", link).
Str("pageLink", page.Link).
Msg("error downloading media file")
continue
}
return fmt.Errorf("error downloading media file: %s embedded in %s", err, page.Link)
return fmt.Errorf("error fetching media file %s: %s", link, err)
}
if err = download(outputFilePath, media); err != nil {
if g.continueOnMediaDownloadFailure {
log.Error().
Err(err).
Str("mediaLink", link).
Str("pageLink", pageURL.String()).
Msg("error downloading media file")
continue
}
return fmt.Errorf("error downloading media file: %s embedded in %s", err, pageURL.String())
}
}
return nil
}

func (g Generator) NewHugoPage(pageURL *url.URL, page wpparser.CommonFields) (*hugopage.Page, error) {
return hugopage.NewPage(
g.imageURLProvider,
*pageURL, page.Author, page.Title, page.PublishDate,
page.PublishStatus == wpparser.PublishStatusDraft || page.PublishStatus == wpparser.PublishStatusPending,
page.Categories, page.Tags, page.Footnotes, page.Content, page.GUID)
}
2 changes: 1 addition & 1 deletion src/wp2hugo/internal/hugogenerator/hugo_generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func TestFootnote(t *testing.T) {
generator := NewGenerator("/tmp", "", nil, false, false, *websiteInfo)
url1, err := url.Parse(post.GUID.Value)
assert.NoError(t, err)
hugoPage, err := generator.NewHugoPage(url1, post.CommonFields)
hugoPage, err := generator.newHugoPage(url1, post.CommonFields)
assert.NoError(t, err)

const expectedMarkdown = "Some text[^1] with a footnote\n\n[^1]: Here we are: the footnote."
Expand Down
71 changes: 60 additions & 11 deletions src/wp2hugo/internal/hugogenerator/hugopage/hugo_page.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ const (
type Page struct {
// This is the original URL of the page from the WordPress site
absoluteURL url.URL

metadata map[string]any
markdown string
metadata map[string]any
markdown string
}

const _WordPressMoreTag = "<!--more-->"
Expand Down Expand Up @@ -62,12 +61,16 @@ var _hugoFigureLinks = regexp.MustCompile(`{{< figure.*?src="(.+?)".*? >}}`)
// {{< parallaxblur src="/wp-content/uploads/2018/12/bora%5Fbora%5F5%5Fresized.jpg" >}}
var _hugoParallaxBlurLinks = regexp.MustCompile(`{{< parallaxblur.*?src="(.+?)".*? >}}`)

func NewPage(provider ImageURLProvider, pageURL url.URL, author string, title string, publishDate *time.Time, isDraft bool,
categories []string, tags []string, footnotes []wpparser.Footnote,
htmlContent string, guid *rss.GUID) (*Page, error) {
func NewPage(provider ImageURLProvider, pageURL url.URL, author string, title string, publishDate *time.Time,
isDraft bool, categories []string, tags []string, footnotes []wpparser.Footnote,
htmlContent string, guid *rss.GUID, featuredImageID *string) (*Page, error) {
metadata, err := getMetadata(provider, pageURL, author, title, publishDate, isDraft, categories, tags, guid, featuredImageID)
if err != nil {
return nil, err
}
page := Page{
absoluteURL: pageURL,
metadata: getMetadata(pageURL, author, title, publishDate, isDraft, categories, tags, guid),
metadata: metadata,
}
// htmlContent is the HTML content of the page that will be
// transformed to Markdown
Expand Down Expand Up @@ -97,7 +100,12 @@ func (page *Page) WPImageLinks() []string {
arr1 := getMarkdownLinks(_markdownImageLinks, page.markdown)
arr2 := getMarkdownLinks(_hugoFigureLinks, page.markdown)
arr3 := getMarkdownLinks(_hugoParallaxBlurLinks, page.markdown)
return append(append(arr1, arr2...), arr3...)
coverImageURL := page.getCoverImageURL()
result := append(append(arr1, arr2...), arr3...)
if coverImageURL != nil {
result = append(result, *coverImageURL)
}
return result
}

func getMarkdownLinks(regex *regexp.Regexp, markdown string) []string {
Expand All @@ -109,8 +117,8 @@ func getMarkdownLinks(regex *regexp.Regexp, markdown string) []string {
return links
}

func getMetadata(pageURL url.URL, author string, title string, publishDate *time.Time, isDraft bool,
categories []string, tags []string, guid *rss.GUID) map[string]any {
func getMetadata(provider ImageURLProvider, pageURL url.URL, author string, title string, publishDate *time.Time,
isDraft bool, categories []string, tags []string, guid *rss.GUID, featuredImageID *string) (map[string]any, error) {
metadata := make(map[string]any)
metadata["url"] = pageURL.Path // Relative URL
metadata["author"] = author
Expand All @@ -132,7 +140,48 @@ func getMetadata(pageURL url.URL, author string, title string, publishDate *time
if guid != nil {
metadata["guid"] = guid.Value
}
return metadata
if featuredImageID != nil {
if imageInfo, err := provider.GetImageInfo(*featuredImageID); err != nil {
log.Fatal().
Err(err).
Str("imageID", *featuredImageID).
Msg("Image URL not found")
} else {
coverInfo := make(map[string]string)
imageURL, err := url.Parse(imageInfo.ImageURL)
if err != nil {
return nil, fmt.Errorf("error parsing image URL '%s': %s", imageInfo.ImageURL, err)
}
if imageURL.Host == pageURL.Host {
// If the image URL is on the same host as the page, we can use a relative URL
coverInfo["image"] = imageURL.Path
} else {
coverInfo["image"] = imageInfo.ImageURL
}
coverInfo["alt"] = imageInfo.Title
metadata["cover"] = coverInfo
}
}
return metadata, nil
}

func (page *Page) getCoverImageURL() *string {
if page.metadata == nil {
return nil
}
cover, ok := page.metadata["cover"]
if !ok {
return nil
}
coverInfo, ok := cover.(map[string]string)
if !ok {
return nil
}
url1, ok := coverInfo["image"]
if !ok {
return nil
}
return &url1
}

func (page *Page) writeMetadata(w io.Writer) error {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func TestManualLineBreaks(t *testing.T) {
func testMarkdownExtractor(t *testing.T, htmlInput string, markdownOutput string) {
url1, err := url.Parse("https://example.com")
assert.Nil(t, err)
page, err := NewPage(nil, *url1, "author", "Title", nil, false, nil, nil, nil, htmlInput, nil)
page, err := NewPage(nil, *url1, "author", "Title", nil, false, nil, nil, nil, htmlInput, nil, nil)
assert.Nil(t, err)
md, err := page.getMarkdown(nil, htmlInput, nil)
assert.Nil(t, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ import (

type ImageURLProvider interface {
// E.g. converts "4256" to "https://ashishb.net/wp-content/uploads/2018/12/bora_bora_5_resized.jpg"
GetImageURL(imageID string) (*string, error)
GetImageInfo(imageID string) (*ImageInfo, error)
}

type ImageInfo struct {
ImageURL string
Title string
}

// Example: [nk_awb awb_type="image" awb_image="4256" awb_stretch="true" awb_image_size="full" awb_image_background_size="cover" awb_image_background_position="50% 50%" awb_parallax="scroll-opacity" awb_parallax_speed="0.5" awb_parallax_mobile="true"]
Expand All @@ -30,15 +35,15 @@ func replaceAWBWithParallaxBlur(provider ImageURLProvider, htmlData string) stri

func awbReplacementFunction(provider ImageURLProvider, groups []string) string {
srcImageID := groups[1]
tmp, err := provider.GetImageURL(srcImageID)
tmp, err := provider.GetImageInfo(srcImageID)
if tmp == nil {
log.Fatal().
Err(err).
Str("imageID", srcImageID).
Msg("Image URL not found")
return ""
}
src := *tmp
src := tmp.ImageURL
// These character creates problem in Hugo's markdown
src = strings.ReplaceAll(src, " ", "%20")
src = strings.ReplaceAll(src, "_", "%5F")
Expand Down
8 changes: 6 additions & 2 deletions src/wp2hugo/internal/hugogenerator/image_url_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package hugogenerator

import (
"fmt"
"github.com/ashishb/wp2hugo/src/wp2hugo/internal/hugogenerator/hugopage"
"github.com/ashishb/wp2hugo/src/wp2hugo/internal/wpparser"
"github.com/rs/zerolog/log"
)
Expand All @@ -10,15 +11,18 @@ type WordPressImageURLProvider struct {
info wpparser.WebsiteInfo
}

func (w WordPressImageURLProvider) GetImageURL(imageID string) (*string, error) {
func (w WordPressImageURLProvider) GetImageInfo(imageID string) (*hugopage.ImageInfo, error) {
log.Debug().Str("imageID", imageID).Msg("GetImageURL")
for _, attachment := range w.info.Attachments {
if attachment.PostID == imageID {
attachmentURL := attachment.GetAttachmentURL()
log.Info().
Str("imageID", imageID).
Str("Link", *attachmentURL).Msg("Image URL found")
return attachmentURL, nil
return &hugopage.ImageInfo{
ImageURL: *attachmentURL,
Title: attachment.Title,
}, nil
}
}
log.Error().Str("imageID", imageID).Msg("Image URL not found")
Expand Down
Loading

0 comments on commit 2724ca5

Please sign in to comment.