Skip to content

Commit

Permalink
Add an option to enable URL fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
waybackarchiver committed Nov 7, 2021
1 parent fd699ca commit 3350fb6
Show file tree
Hide file tree
Showing 16 changed files with 112 additions and 21 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ You can also specify configuration options either via command flags or via envir
| - | `WAYBACK_MAX_MEDIA_SIZE` | `512MB` | Max size to limit download stream media |
| - | `WAYBACK_TIMEOUT` | `300` | Timeout for single wayback request, defaults to 300 second |
| - | `WAYBACK_USERAGENT` | `WaybackArchiver/1.0` | User-Agent for a wayback request |
| - | `WAYBACK_FALLBACK` | `off` | Use Google cache as a fallback if the original webpage is unavailable |
| `-d`, `--daemon` | - | - | Run as daemon service, e.g. `telegram`, `web`, `mastodon`, `twitter`, `discord` |
| `--ia` | `WAYBACK_ENABLE_IA` | `true` | Wayback webpages to **Internet Archive** |
| `--is` | `WAYBACK_ENABLE_IS` | `true` | Wayback webpages to **Archive Today** |
Expand Down
48 changes: 48 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1283,3 +1283,51 @@ func TestWaybackUserAgent(t *testing.T) {
})
}
}

func TestWaybackFallback(t *testing.T) {
t.Parallel()

var tests = []struct {
fallback string
expected bool
}{
{
fallback: "",
expected: defWaybackFallback,
},
{
fallback: "unexpected",
expected: defWaybackFallback,
},
{
fallback: "on",
expected: true,
},
{
fallback: "true",
expected: true,
},
{
fallback: "0",
expected: false,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_FALLBACK", test.fallback)

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

got := opts.WaybackFallback()
if got != test.expected {
t.Fatalf(`Unexpected set wayback fallback got %t instead of %t`, got, test.expected)
}
})
}
}
9 changes: 9 additions & 0 deletions config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ const (
defMaxMediaSize = "512MB"
defWaybackTimeout = 300
defWaybackUserAgent = "WaybackArchiver/1.0"
defWaybackFallback = false
)

var (
Expand Down Expand Up @@ -109,6 +110,7 @@ type Options struct {
maxMediaSize string
waybackTimeout int
waybackUserAgent string
waybackFallback bool
}

type ipfs struct {
Expand Down Expand Up @@ -196,6 +198,7 @@ func NewOptions() *Options {
maxMediaSize: defMaxMediaSize,
waybackTimeout: defWaybackTimeout,
waybackUserAgent: defWaybackUserAgent,
waybackFallback: defWaybackFallback,
ipfs: &ipfs{
host: defIPFSHost,
port: defIPFSPort,
Expand Down Expand Up @@ -638,3 +641,9 @@ func (o *Options) WaybackTimeout() time.Duration {
func (o *Options) WaybackUserAgent() string {
return o.waybackUserAgent
}

// WaybackFallback returns whether fallback to Google cache is enabled if
// the original webpage is unavailable.
func (o *Options) WaybackFallback() bool {
return o.waybackFallback
}
2 changes: 2 additions & 0 deletions config/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.waybackTimeout = parseInt(val, defWaybackTimeout)
case "WAYBACK_USERAGENT":
p.opts.waybackUserAgent = parseString(val, defWaybackUserAgent)
case "WAYBACK_FALLBACK":
p.opts.waybackFallback = parseBool(val, defWaybackFallback)
default:
if os.Getenv(key) == "" && val != "" {
os.Setenv(key, val)
Expand Down
6 changes: 3 additions & 3 deletions service/discord/discord.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"strings"

"github.com/fatih/color"
"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
Expand All @@ -21,6 +20,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"

Expand Down Expand Up @@ -235,7 +235,7 @@ func (d *Discord) process(m *discord.MessageCreate) (err error) {
content := m.Content
logger.Debug("content: %s", content)

urls := helper.MatchURLFallback(content)
urls := service.MatchURL(content)

switch {
case m.GuildID != "" && !d.isMention(content):
Expand Down Expand Up @@ -318,7 +318,7 @@ func (d *Discord) playback(s *discord.Session, i *discord.InteractionCreate) err
metrics.IncrementPlayback(metrics.ServiceDiscord, metrics.StatusRequest)

text := i.ApplicationCommandData().Options[0].StringValue()
urls := helper.MatchURL(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
return d.bot.InteractionRespond(i.Interaction, &discord.InteractionResponse{
Type: discord.InteractionResponseChannelMessageWithSource,
Expand Down
8 changes: 8 additions & 0 deletions service/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2021 Wayback Archiver. All rights reserved.
// Use of this source code is governed by the GNU GPL v3
// license that can be found in the LICENSE file.

/*
Package service implements the common utils function for daemon services.
*/
package service // import "github.com/wabarc/wayback/service"
6 changes: 3 additions & 3 deletions service/httpd/httpd.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ import (

"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
"github.com/wabarc/wayback/metrics"
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/template"
"github.com/wabarc/wayback/version"
)
Expand Down Expand Up @@ -208,7 +208,7 @@ func (web *web) process(w http.ResponseWriter, r *http.Request) {
}
logger.Debug("text: %s", text)

urls := helper.MatchURLFallback(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
logger.Warn("url no found.")
}
Expand Down Expand Up @@ -267,7 +267,7 @@ func (web *web) playback(w http.ResponseWriter, r *http.Request) {
}
logger.Debug("text: %s", text)

urls := helper.MatchURL(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
logger.Warn("url no found.")
}
Expand Down
6 changes: 3 additions & 3 deletions service/mastodon/mastodon.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"time"

"github.com/mattn/go-mastodon"
"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
Expand All @@ -21,6 +20,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"
"golang.org/x/net/html"
Expand Down Expand Up @@ -188,7 +188,7 @@ func (m *Mastodon) process(id mastodon.ID, status *mastodon.Status) (err error)
return m.playback(status)
}

urls := helper.MatchURLFallback(text)
urls := service.MatchURL(text)
pub := publish.NewMastodon(m.client)
if len(urls) == 0 {
logger.Warn("archives failure, URL no found.")
Expand All @@ -214,7 +214,7 @@ func (m *Mastodon) process(id mastodon.ID, status *mastodon.Status) (err error)

func (m *Mastodon) playback(status *mastodon.Status) error {
text := textContent(status.Content)
urls := helper.MatchURL(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
logger.Warn("playback failure, URL no found.")
return errors.New("Mastodon: URL no found")
Expand Down
6 changes: 3 additions & 3 deletions service/matrix/matrix.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"strings"
"sync"

"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
Expand All @@ -18,6 +17,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"
matrix "maunium.net/go/mautrix"
Expand Down Expand Up @@ -167,7 +167,7 @@ func (m *Matrix) process(ev *event.Event) error {
return m.playback(ev)
}

urls := helper.MatchURLFallback(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
logger.Warn("archives failure, URL no found.")
// Redact message
Expand Down Expand Up @@ -213,7 +213,7 @@ func (m *Matrix) process(ev *event.Event) error {

func (m *Matrix) playback(ev *event.Event) error {
text := ev.Content.AsMessage().Body
urls := helper.MatchURL(text)
urls := service.MatchURL(text)
// Redact message
defer m.redact(ev, "URL no found. Original message: "+text)
if len(urls) == 0 {
Expand Down
4 changes: 2 additions & 2 deletions service/relaychat/relaychat.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"sync"

irc "github.com/thoj/go-ircevent"
"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
Expand All @@ -19,6 +18,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"
)
Expand Down Expand Up @@ -125,7 +125,7 @@ func (i *IRC) process(ev *irc.Event) error {
text := ev.MessageWithoutFormat()
logger.Debug("from: %s message: %s", ev.Nick, text)

urls := helper.MatchURLFallback(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
logger.Warn("archives failure, URL no found.")
return errors.New("IRC: URL no found")
Expand Down
6 changes: 3 additions & 3 deletions service/slack/slack.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/slack-go/slack"
"github.com/slack-go/slack/slackevents"
"github.com/slack-go/slack/socketmode"
"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
Expand All @@ -20,6 +19,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"
)
Expand Down Expand Up @@ -256,7 +256,7 @@ func (s *Slack) process(ev *event) (err error) {
content := ev.Text
logger.Debug("content: %s", content)

urls := helper.MatchURLFallback(content)
urls := service.MatchURL(content)

metrics.IncrementWayback(metrics.ServiceSlack, metrics.StatusRequest)
if len(urls) == 0 {
Expand Down Expand Up @@ -320,7 +320,7 @@ func (s *Slack) playback(channel, text, triggerID string) error {
logger.Debug("channel %s, playback text %s, trigger id: %s", channel, text, triggerID)
metrics.IncrementPlayback(metrics.ServiceSlack, metrics.StatusRequest)

urls := helper.MatchURL(text)
urls := service.MatchURL(text)
if len(urls) == 0 {
// Only the inputs in input blocks will be included in view_submission’s view.state.values: https://slack.dev/java-slack-sdk/guides/modals
playbackNameText := slack.NewTextBlockObject(slack.PlainTextType, "URLs", false, false)
Expand Down
6 changes: 3 additions & 3 deletions service/telegram/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"time"

"github.com/fatih/color"
"github.com/wabarc/helper"
"github.com/wabarc/logger"
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
Expand All @@ -24,6 +23,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"

Expand Down Expand Up @@ -183,7 +183,7 @@ func (t *Telegram) process(message *telegram.Message) (err error) {
if message.IsForwarded() && content == "" {
return nil
}
urls := helper.MatchURLFallback(content)
urls := service.MatchURL(content)

// Set command as playback if receive a playback command without URLs, and
// required user reply a message with URLs.
Expand Down Expand Up @@ -287,7 +287,7 @@ func (t *Telegram) playback(message *telegram.Message) error {
return err
}

urls := helper.MatchURL(message.Text)
urls := service.MatchURL(message.Text)
if len(urls) == 0 {
opts := &telegram.SendOptions{
ReplyTo: message,
Expand Down
3 changes: 2 additions & 1 deletion service/twitter/twitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/reduxer"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/template/render"
)
Expand Down Expand Up @@ -158,7 +159,7 @@ func (t *Twitter) process(event twitter.DirectMessageEvent) error {
t.Unlock()
}()

urls := helper.MatchURLFallback(text)
urls := service.MatchURL(text)
var realURLs []string
for _, url := range urls {
realURLs = append(realURLs, helper.RealURI(url))
Expand Down
18 changes: 18 additions & 0 deletions service/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright 2021 Wayback Archiver. All rights reserved.
// Use of this source code is governed by the GNU GPL v3
// license that can be found in the LICENSE file.

package service // import "github.com/wabarc/wayback/service"

import (
"github.com/wabarc/helper"
"github.com/wabarc/wayback/config"
)

// MatchURL returns a slice string contains URLs extracted from the given string.
func MatchURL(s string) []string {
if config.Opts.WaybackFallback() {
return helper.MatchURLFallback(s)
}
return helper.MatchURL(s)
}
3 changes: 3 additions & 0 deletions wayback.1
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ Timeout for single wayback request, default 300\&.
.B WAYBACK_USERAGENT
User-Agent for a wayback request, default WaybackArchiver/1.0\&.
.TP
.B WAYBACK_FALLBACK
Use Google cache as a fallback if the original webpage is unavailable, default "off"\&.
.TP
.B WAYBACK_BOLT_PATH
File path of bolt database. default ./wayback.db\&.
.TP
Expand Down
1 change: 1 addition & 0 deletions wayback.conf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ WAYBACK_STORAGE_DIR=
WAYBACK_MAX_MEDIA_SIZE=512MB
WAYBACK_TIMEOUT=300
WAYBACK_USERAGENT=WaybackArchiver/1.0
WAYBACK_FALLBACK=off

# ipfs slot: infura, pinata
# doc: https://github.com/wabarc/ipfs-pinner#supported-pinning-services
Expand Down

0 comments on commit 3350fb6

Please sign in to comment.