Skip to content

Commit

Permalink
Rename client identifier type, add some docs (#50)
Browse files Browse the repository at this point in the history
  • Loading branch information
efixler authored Aug 8, 2024
1 parent 0311158 commit 4eb322b
Show file tree
Hide file tree
Showing 14 changed files with 63 additions and 59 deletions.
4 changes: 2 additions & 2 deletions fetch/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const (

type Client interface {
Get(url string, headers http.Header) (*http.Response, error)
Identifier() resource.FetchClient
Identifier() resource.ClientIdentifier
}

type ClientOption func(*defaultClient) error
Expand Down Expand Up @@ -49,7 +49,7 @@ type defaultClient struct {
httpClient *http.Client
}

func (c defaultClient) Identifier() resource.FetchClient {
func (c defaultClient) Identifier() resource.ClientIdentifier {
return resource.DefaultClient
}

Expand Down
2 changes: 1 addition & 1 deletion internal/headless/fetch_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func NewChromeClient(ctx context.Context, userAgent string, maxConcurrent int) (
return c, nil
}

func (c client) Identifier() resource.FetchClient {
func (c client) Identifier() resource.ClientIdentifier {
return resource.HeadlessChromium
}

Expand Down
4 changes: 2 additions & 2 deletions internal/server/api/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
)

type mockUrlFetcher struct {
fetchMethod resource.FetchClient
fetchMethod resource.ClientIdentifier
}

func (m *mockUrlFetcher) Fetch(url *nurl.URL) (*resource.WebPage, error) {
Expand Down Expand Up @@ -175,7 +175,7 @@ func TestSingleHandler(t *testing.T) {
name string
url string
handler http.HandlerFunc
expectMethod resource.FetchClient
expectMethod resource.ClientIdentifier
}{
{
name: "client",
Expand Down
2 changes: 1 addition & 1 deletion internal/server/version/version.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package version

const (
Commit = "1a12247"
Commit = "0311158"
Tag = "v0.8.6"
RepoURL = "https://github.com/efixler/scrape"
)
2 changes: 1 addition & 1 deletion internal/settings/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func populateTestDB(dbh *database.DBHandle, count int) ([]string, error) {
ds := &DomainSettings{
Domain: d,
Sitename: randomString(32),
FetchClient: resource.FetchClient(rand.Intn(3)),
FetchClient: resource.ClientIdentifier(rand.Intn(3)),
UserAgent: ua.UserAgent(randomString(64)),
Headers: map[string]string{
"x-token": randomString(rand.Intn(128) + 127),
Expand Down
10 changes: 5 additions & 5 deletions internal/settings/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ var (
)

type DomainSettings struct {
Domain string `json:"domain,omitempty"`
Sitename string `json:"sitename,omitempty"`
FetchClient resource.FetchClient `json:"fetch_client,omitempty"`
UserAgent ua.UserAgent `json:"user_agent,omitempty"`
Headers MIMEHeader `json:"headers,omitempty"`
Domain string `json:"domain,omitempty"`
Sitename string `json:"sitename,omitempty"`
FetchClient resource.ClientIdentifier `json:"fetch_client,omitempty"`
UserAgent ua.UserAgent `json:"user_agent,omitempty"`
Headers MIMEHeader `json:"headers,omitempty"`
}

// Domain names will be case-folded to lower case.
Expand Down
4 changes: 2 additions & 2 deletions internal/settings/domain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestJSONUnmarshal(t *testing.T) {
data string
expectErr bool
expectSitename string
expectFetchClient resource.FetchClient
expectFetchClient resource.ClientIdentifier
expectUserAgent ua.UserAgent
expectHeaders map[string]string
}{
Expand Down Expand Up @@ -90,7 +90,7 @@ func TestJSONMarshal(t *testing.T) {
expectErr bool
expectJSON string
expectSitename string
expectFetchClient resource.FetchClient
expectFetchClient resource.ClientIdentifier
expectUserAgent ua.UserAgent
expectHeaders map[string]string
}{
Expand Down
2 changes: 1 addition & 1 deletion internal/storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (s URLDataStore) Fetch(url *nurl.URL) (*resource.WebPage, error) {
expiryEpoch int64
metadata string
contentText string
fetchMethod resource.FetchClient
fetchMethod resource.ClientIdentifier
)
err = rows.Scan(&canonicalUrl, &parsedUrl, &fetchEpoch, &expiryEpoch, &metadata, &contentText, &fetchMethod)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions resource/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ import (
"github.com/mmcdole/gofeed"
)

// Adds a RequestedURL field to the gofeed.Feed struct,
// along with the ItemLinks() function.
type Feed struct {
RequestedURL string `json:"requested_url,omitempty"`
gofeed.Feed
}

// Returns a slice of links for each item in the feed.
func (f Feed) ItemLinks() []string {
rval := make([]string, len(f.Items))
for i, item := range f.Items {
Expand Down
26 changes: 13 additions & 13 deletions resource/fetch_method.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,50 +7,50 @@ import (
"fmt"
)

type FetchClient int
type ClientIdentifier int

const (
Unspecified FetchClient = iota
Unspecified ClientIdentifier = iota
DefaultClient
HeadlessChromium
)

var fetchMethods = map[FetchClient]string{
var fetchClientNames = map[ClientIdentifier]string{
Unspecified: "unspecified",
DefaultClient: "direct",
HeadlessChromium: "chromium-headless",
}

var ErrNoSuchFetchMethod = errors.New("no such FetchMethod")
var ErrNoSuchFetchMethod = errors.New("no such fetch client identifier")

func (f FetchClient) String() string {
if val, ok := fetchMethods[f]; ok {
func (f ClientIdentifier) String() string {
if val, ok := fetchClientNames[f]; ok {
return val
} else {
return "Unknown"
}
}

func (f *FetchClient) UnmarshalText(data []byte) error {
for k, v := range fetchMethods {
func (f *ClientIdentifier) UnmarshalText(data []byte) error {
for k, v := range fetchClientNames {
if v == string(data) {
*f = k
return nil
}
}
return errors.Join(
fmt.Errorf("invalid FetchMethod %q", string(data)),
fmt.Errorf("invalid name %q", string(data)),
ErrNoSuchFetchMethod,
)
}

func (f FetchClient) MarshalText() ([]byte, error) {
if val, ok := fetchMethods[f]; ok {
func (f ClientIdentifier) MarshalText() ([]byte, error) {
if val, ok := fetchClientNames[f]; ok {
return []byte(val), nil
} else {
return []byte(fetchMethods[Unspecified]),
return []byte(fetchClientNames[Unspecified]),
errors.Join(
fmt.Errorf("invalid FetchMethod %q", int(f)),
fmt.Errorf("invalid name %q", int(f)),
ErrNoSuchFetchMethod,
)
}
Expand Down
14 changes: 7 additions & 7 deletions resource/fetch_method_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func TestFetchMethodString(t *testing.T) {
tests := []struct {
name string
f FetchClient
f ClientIdentifier
want string
}{
{
Expand Down Expand Up @@ -37,11 +37,11 @@ func TestFetchMethodString(t *testing.T) {

func TestUnmarshal(t *testing.T) {
type container struct {
F FetchClient `json:"fetch_method"`
F ClientIdentifier `json:"fetch_method"`
}
tests := []struct {
input string
expectedValue FetchClient
expectedValue ClientIdentifier
expectError bool
}{
{input: "unspecified", expectedValue: Unspecified},
Expand Down Expand Up @@ -69,13 +69,13 @@ func TestMarshal(t *testing.T) {
expectedValue string
expectError bool
}{
{input: 0, expectedValue: fetchMethods[Unspecified]},
{input: 1, expectedValue: fetchMethods[DefaultClient]},
{input: 2, expectedValue: fetchMethods[HeadlessChromium]},
{input: 0, expectedValue: fetchClientNames[Unspecified]},
{input: 1, expectedValue: fetchClientNames[DefaultClient]},
{input: 2, expectedValue: fetchClientNames[HeadlessChromium]},
{input: -1, expectError: true},
}
for _, test := range tests {
fm := FetchClient(test.input)
fm := ClientIdentifier(test.input)
val, err := fm.MarshalText()
if (err != nil) != test.expectError {
t.Errorf("%q expected error %v, got %v", test.input, test.expectError, err)
Expand Down
1 change: 1 addition & 0 deletions resource/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ var illegalParams = []string{
"utm_brand",
}

// CleanURL removes utm_ parameters from the URL
func CleanURL(url *nurl.URL) *nurl.URL {
if url == nil {
return nil
Expand Down
46 changes: 23 additions & 23 deletions resource/web_page.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,29 +35,29 @@ func NewWebPage(url nurl.URL) *WebPage {
// Represents a web page that was fetched, including metadata from the page itself,
// text content, and information about the fetch operation.
type WebPage struct { // The page that was requested by the caller
RequestedURL *nurl.URL `json:"-"` // The page that was actually fetched
CanonicalURL *nurl.URL `json:"-"`
OriginalURL string `json:"original_url,omitempty"` // The canonical URL of the page
TTL time.Duration `json:"-"` // Time to live for the resource
FetchTime *time.Time `json:"fetch_time,omitempty"` // When the returned source was fetched
FetchMethod FetchClient `json:"fetch_method,omitempty"` // Method used to fetch the page
Hostname string `json:"hostname,omitempty"` // Hostname of the page
StatusCode int `json:"status_code,omitempty"` // HTTP status code
Error error `json:"error,omitempty"`
Title string `json:"title,omitempty"` // Title of the page
Description string `json:"description,omitempty"` // Description of the page
Sitename string `json:"sitename,omitempty"` // Name of the site
Authors []string `json:"authors,omitempty"` // Authors of the page
Date *time.Time `json:"date,omitempty"` // Date of the page
Categories []string `json:"categories,omitempty"` // Categories of the page
Tags []string `json:"tags,omitempty"` // Tags of the page
Language string `json:"language,omitempty"` // Language of the page
Image string `json:"image,omitempty"` // Image of the page
PageType string `json:"page_type,omitempty"` // Type of the page
License string `json:"license,omitempty"` // License of the page
ID string `json:"id,omitempty"` // ID of the page
Fingerprint string `json:"fingerprint,omitempty"` // Fingerprint of the page
ContentText string `json:"content_text,omitempty"` // Error that occurred during fetching
RequestedURL *nurl.URL `json:"-"` // The page that was actually fetched
CanonicalURL *nurl.URL `json:"-"`
OriginalURL string `json:"original_url,omitempty"` // The canonical URL of the page
TTL time.Duration `json:"-"` // Time to live for the resource
FetchTime *time.Time `json:"fetch_time,omitempty"` // When the returned source was fetched
FetchMethod ClientIdentifier `json:"fetch_method,omitempty"` // Method used to fetch the page
Hostname string `json:"hostname,omitempty"` // Hostname of the page
StatusCode int `json:"status_code,omitempty"` // HTTP status code
Error error `json:"error,omitempty"`
Title string `json:"title,omitempty"` // Title of the page
Description string `json:"description,omitempty"` // Description of the page
Sitename string `json:"sitename,omitempty"` // Name of the site
Authors []string `json:"authors,omitempty"` // Authors of the page
Date *time.Time `json:"date,omitempty"` // Date of the page
Categories []string `json:"categories,omitempty"` // Categories of the page
Tags []string `json:"tags,omitempty"` // Tags of the page
Language string `json:"language,omitempty"` // Language of the page
Image string `json:"image,omitempty"` // Image of the page
PageType string `json:"page_type,omitempty"` // Type of the page
License string `json:"license,omitempty"` // License of the page
ID string `json:"id,omitempty"` // ID of the page
Fingerprint string `json:"fingerprint,omitempty"` // Fingerprint of the page
ContentText string `json:"content_text,omitempty"` // Error that occurred during fetching
skipMap map[skippable]bool
}

Expand Down
2 changes: 1 addition & 1 deletion resource/web_page_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func TestExpireTime(t *testing.T) {
func TestFetchMethod(t *testing.T) {
tests := []struct {
name string
f FetchClient
f ClientIdentifier
want string
}{
{
Expand Down

0 comments on commit 4eb322b

Please sign in to comment.