Skip to content

Commit

Permalink
Simplify character encoding handling in raw body reader (#209)
Browse files Browse the repository at this point in the history
It turns out the Go stdlib already provides a ready-made solution for decoding HTTP responses to UTF-8. It comprehensively checks both the charset in the Content-Type header and attempts to infer the encoding from the appropriate charset or http-equiv/content-type <meta> tags.
  • Loading branch information
anfragment authored Jan 16, 2025
1 parent 86e02b7 commit 8e17bb6
Showing 1 changed file with 1 addition and 23 deletions.
24 changes: 1 addition & 23 deletions internal/htmlrewrite/rawbody.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"compress/gzip"
"fmt"
"io"
"mime"
"net/http"
"strings"

Expand All @@ -31,13 +30,7 @@ func readRawBody(res *http.Response) ([]byte, error) {
return nil, fmt.Errorf("create decompressed reader: %w", err)
}

contentType := res.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(contentType)
if err != nil {
res.Body = io.NopCloser(bytes.NewReader(resBytes))
return nil, fmt.Errorf("parse media type: %w", err)
}
decodedReader, err := decodeReader(decompressedReader, params["charset"])
decodedReader, err := charset.NewReader(decompressedReader, res.Header.Get("Content-Type"))
if err != nil {
decompressedReader.Close()
res.Body = io.NopCloser(bytes.NewReader(resBytes))
Expand Down Expand Up @@ -80,18 +73,3 @@ func decompressReader(reader io.Reader, compressionAlg string) (io.ReadCloser, e
return nil, fmt.Errorf("unsupported encoding %q", compressionAlg)
}
}

// decodeReader decodes the reader based on the provided character encoding.
func decodeReader(reader io.Reader, encoding string) (io.Reader, error) {
// Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type#media-type
switch strings.ToLower(encoding) {
case "utf-8", "us-ascii", "":
return reader, nil
default:
encoding, _ := charset.Lookup(encoding)
if encoding == nil {
return nil, fmt.Errorf("unsupported charset %q", encoding)
}
return encoding.NewDecoder().Reader(reader), nil
}
}

0 comments on commit 8e17bb6

Please sign in to comment.