Skip to content

Commit

Permalink
Serve robots.txt to prevent Google indexing a zillion sample jobs
Browse files Browse the repository at this point in the history
Add a `robots.txt` that allows some access to some of the UI (for the
purposes of the demo), but denies access to `/jobs/` so that Google
doesn't try to index thousands of sample jobs.

There were a number of ways to go about this that were plausible. I
ended up putting in an embedded file system that can pull in static
files easily into the Go program, and which could be reused for future
static files in case they're needed. It may be a little more than we
need right now, but it wasn't much harder to do than serving a one off
static file.
  • Loading branch information
brandur committed Jul 19, 2024
1 parent a93fc41 commit b796423
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 0 deletions.
49 changes: 49 additions & 0 deletions handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package riverui

import (
"context"
"embed"
"errors"
"fmt"
"io/fs"
Expand Down Expand Up @@ -99,6 +100,11 @@ func NewHandler(opts *HandlerOpts) (http.Handler, error) {
apiendpoint.Mount(mux, opts.Logger, &queueResumeEndpoint{apiBundle: apiBundle})
apiendpoint.Mount(mux, opts.Logger, &stateAndCountGetEndpoint{apiBundle: apiBundle})
apiendpoint.Mount(mux, opts.Logger, &workflowGetEndpoint{apiBundle: apiBundle})

if err := mountStaticFiles(opts.Logger, mux); err != nil {
return nil, err
}

mux.HandleFunc("/api", http.NotFound)
mux.Handle("/", intercept404(fileServer, serveIndex))

Expand All @@ -111,6 +117,49 @@ func NewHandler(opts *HandlerOpts) (http.Handler, error) {
return middlewareStack.Mount(mux), nil
}

//go:embed public
var publicFS embed.FS

// Walks the embedded filesystem in publicFS and mounts each file as a route on
// the given serve mux. Content type is determined by `http.DetectContentType`.
func mountStaticFiles(logger *slog.Logger, mux *http.ServeMux) error {
return fs.WalkDir(publicFS, ".", func(path string, dirEntry fs.DirEntry, err error) error {
if err != nil {
return err
}

if dirEntry.IsDir() {
return nil
}

servePath := strings.TrimPrefix(path, "public/")

mux.HandleFunc("GET /"+servePath, func(w http.ResponseWriter, r *http.Request) {
runWithError := func() error {
data, err := publicFS.ReadFile(path)
if err != nil {
return err
}

contentType := http.DetectContentType(data)
w.Header().Add("Content-Type", contentType)

if _, err := w.Write(data); err != nil {
return err
}

return nil
}

if err := runWithError(); err != nil {
logger.ErrorContext(r.Context(), "Error writing static file", "err", err)
}
})

return nil
})
}

// Go's http.StripPrefix can sometimes result in an empty path. For example,
// when removing a prefix like "/foo" from path "/foo", the result is "". This
// does not get handled by the ServeMux correctly (it results in a redirect to
Expand Down
30 changes: 30 additions & 0 deletions handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,34 @@ func TestNewHandlerIntegration(t *testing.T) {
makeAPICall(t, "QueueResume", http.MethodPut, makeURL("/api/queues/%s/resume", queuePaused.Name), nil)
makeAPICall(t, "StateAndCountGet", http.MethodGet, makeURL("/api/states"), nil)
makeAPICall(t, "WorkflowGet", http.MethodGet, makeURL("/api/workflows/%s", workflowID), nil)

//
// Static files
//

makeAPICall(t, "RobotsTxt", http.MethodGet, makeURL("/robots.txt"), nil)
}

func TestMountStaticFiles(t *testing.T) {
t.Parallel()

var (
logger = riverinternaltest.Logger(t)
mux = http.NewServeMux()
)

require.NoError(t, mountStaticFiles(logger, mux))

var (
recorder = httptest.NewRecorder()
req = httptest.NewRequest(http.MethodGet, "/robots.txt", nil)
)

mux.ServeHTTP(recorder, req)

status := recorder.Result().StatusCode //nolint:bodyclose
require.Equal(t, http.StatusOK, status)

require.Equal(t, "text/plain; charset=utf-8", recorder.Header().Get("Content-Type"))
require.Contains(t, recorder.Body.String(), "User-Agent")
}
11 changes: 11 additions & 0 deletions internal/apiendpoint/api_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ func executeAPIEndpoint[TReq any, TResp any](w http.ResponseWriter, r *http.Requ
return err
}

if rawExtractor, ok := any(resp).(RawResponder); ok {
return rawExtractor.RespondRaw(w)
}

respData, err := json.Marshal(resp)
if err != nil {
return fmt.Errorf("error marshaling response JSON: %w", err)
Expand Down Expand Up @@ -189,6 +193,13 @@ type RawExtractor interface {
ExtractRaw(r *http.Request) error
}

// RawResponder is an interface that can be implemented by response structs that
// allow them to respond directly to a ResponseWriter instead of emitting the
// normal JSON format.
type RawResponder interface {
RespondRaw(w http.ResponseWriter) error
}

// Make some broad categories of internal error back into something public
// facing because in some cases they can be a vast help for debugging.
func maybeInterpretInternalError(err error) error {
Expand Down
2 changes: 2 additions & 0 deletions public/robots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
User-Agent: *
Disallow: /jobs/

0 comments on commit b796423

Please sign in to comment.