Skip to content

Commit

Permalink
Merge pull request #64 from beclab/feat/beat-arm
Browse files Browse the repository at this point in the history
fix article title
  • Loading branch information
kaki-admin authored Oct 15, 2024
2 parents 91dd815 + facfa3d commit fbcccfd
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 7 deletions.
7 changes: 3 additions & 4 deletions backend-server/crawler/entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ func EntryCrawler(entry *model.Entry, feedUrl, userAgent, cookie string, certifi
)

if entry.RawContent != "" {
common.Logger.Info("crawler entry start to extract", zap.String("url", entry.URL))
fullContent, pureContent, dateInArticle, imageUrlFromContent, title, templateAuthor, publishedAtTimestamp, mediaContent, mediaUrl, mediaType := processor.ArticleReadabilityExtractor(entry.RawContent, entry.URL, feedUrl, "", true)
if strings.TrimSpace(entry.Title) == "" {
entry.Title = extractTitleByHtml(entry.RawContent)
entry.Title = title
}
common.Logger.Info("crawler entry start to extract", zap.String("url", entry.URL))
fullContent, pureContent, dateInArticle, imageUrlFromContent, templateAuthor, publishedAtTimestamp, mediaContent, mediaUrl, mediaType := processor.ArticleReadabilityExtractor(entry.RawContent, entry.URL, feedUrl, "", true)

entry.FullContent = fullContent
entry.MediaContent = mediaContent
entry.MediaUrl = mediaUrl
Expand Down
2 changes: 1 addition & 1 deletion backend-server/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ require (

require (
github.com/PuerkitoBio/goquery v1.9.1
github.com/beclab/article-extractor v0.0.19
github.com/beclab/article-extractor v0.0.21
github.com/gorilla/mux v1.8.1
github.com/lib/pq v1.10.9
github.com/pemistahl/lingua-go v1.4.0
Expand Down
4 changes: 2 additions & 2 deletions backend-server/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VP
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/beclab/article-extractor v0.0.19 h1:zICF6WN0gzEuJzANic658Kt3+07mADLb7470+zL6Dtw=
github.com/beclab/article-extractor v0.0.19/go.mod h1:mGT6FvsLc9xzccWLO02XPvpNBcejj8ZTz40u7bkCBgI=
github.com/beclab/article-extractor v0.0.21 h1:NMphl6MJLCiFBDHmj94XPpd+HchAb/a43SQGD4Tb8Ns=
github.com/beclab/article-extractor v0.0.21/go.mod h1:mGT6FvsLc9xzccWLO02XPvpNBcejj8ZTz40u7bkCBgI=
github.com/beclab/fs-lib v0.0.2 h1:NGsbNSt3wLweJgFW3gOBz9UyLoe4L8VTNKxIJOTgN2Y=
github.com/beclab/fs-lib v0.0.2/go.mod h1:7MvnhYdMGnFiQBl96B1g5IGeZ0jM+p501ip2T+ZxR+I=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
Expand Down

0 comments on commit fbcccfd

Please sign in to comment.