Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new option SetCustomParseMediaType to customise mediatype parsing #308

Merged
merged 4 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ import (
"strings"

inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"
)

// detectMultipartMessage returns true if the message has a recognized multipart Content-Type header
func detectMultipartMessage(root *Part, multipartWOBoundaryAsSinglepart bool) bool {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mtype, params, _, err := mediatype.Parse(ctype)
mtype, params, _, err := root.parseMediaType(ctype)
if err != nil {
return false
}
Expand All @@ -35,27 +34,27 @@ func detectMultipartMessage(root *Part, multipartWOBoundaryAsSinglepart bool) bo
// - Content-Disposition: attachment; filename="frog.jpg"
// - Content-Disposition: inline; filename="frog.jpg"
// - Content-Type: attachment; filename="frog.jpg"
func detectAttachmentHeader(header inttp.MIMEHeader) bool {
mtype, params, _, _ := mediatype.Parse(header.Get(hnContentDisposition))
func detectAttachmentHeader(root *Part, header inttp.MIMEHeader) bool {
mtype, params, _, _ := root.parseMediaType(header.Get(hnContentDisposition))
if strings.ToLower(mtype) == cdAttachment ||
(strings.ToLower(mtype) == cdInline && len(params) > 0) {
return true
}

mtype, _, _, _ = mediatype.Parse(header.Get(hnContentType))
mtype, _, _, _ = root.parseMediaType(header.Get(hnContentType))
return strings.ToLower(mtype) == cdAttachment
}

// detectTextHeader returns true, if the the MIME headers define a valid 'text/plain' or 'text/html'
// part. If the emptyContentTypeIsPlain argument is set to true, a missing Content-Type header will
// result in a positive plain part detection.
func detectTextHeader(header inttp.MIMEHeader, emptyContentTypeIsText bool) bool {
func detectTextHeader(root *Part, header inttp.MIMEHeader, emptyContentTypeIsText bool) bool {
ctype := header.Get(hnContentType)
if ctype == "" && emptyContentTypeIsText {
return true
}

if mtype, _, _, err := mediatype.Parse(ctype); err == nil {
if mtype, _, _, err := root.parseMediaType(ctype); err == nil {
switch mtype {
case ctTextPlain, ctTextHTML:
return true
Expand All @@ -68,23 +67,23 @@ func detectTextHeader(header inttp.MIMEHeader, emptyContentTypeIsText bool) bool
// detectBinaryBody returns true if the mail header defines a binary body.
func detectBinaryBody(root *Part) bool {
header := inttp.MIMEHeader(root.Header) // Use internal header methods.
if detectTextHeader(header, true) {
if detectTextHeader(root, header, true) {
// It is text/plain, but an attachment.
// Content-Type: text/plain; name="test.csv"
// Content-Disposition: attachment; filename="test.csv"
// Check for attachment only, or inline body is marked
// as attachment, too.
mtype, _, _, _ := mediatype.Parse(header.Get(hnContentDisposition))
mtype, _, _, _ := root.parseMediaType(header.Get(hnContentDisposition))
return strings.ToLower(mtype) == cdAttachment
}

isBin := detectAttachmentHeader(header)
isBin := detectAttachmentHeader(root, header)
if !isBin {
// This must be an attachment, if the Content-Type is not
// 'text/plain' or 'text/html'.
// Example:
// Content-Type: application/pdf; name="doc.pdf"
mtype, _, _, _ := mediatype.Parse(header.Get(hnContentType))
mtype, _, _, _ := root.parseMediaType(header.Get(hnContentType))
mtype = strings.ToLower(mtype)
if mtype != ctTextPlain && mtype != ctTextHTML {
return true
Expand Down
8 changes: 6 additions & 2 deletions detect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,10 @@ func TestDetectAttachmentHeader(t *testing.T) {
},
}

root := &Part{parser: &defaultParser}

for _, s := range htests {
got := detectAttachmentHeader(s.header)
got := detectAttachmentHeader(root, s.header)
if got != s.want {
t.Errorf("detectAttachmentHeader(%v) == %v, want: %v", s.header, got, s.want)
}
Expand Down Expand Up @@ -192,8 +194,10 @@ func TestDetectTextHeader(t *testing.T) {
},
}

root := &Part{parser: &defaultParser}

for _, s := range htests {
got := detectTextHeader(s.header, s.emptyIsPlain)
got := detectTextHeader(root, s.header, s.emptyIsPlain)
if got != s.want {
t.Errorf("detectTextHeader(%v, %v) == %v, want: %v",
s.header, s.emptyIsPlain, got, s.want)
Expand Down
6 changes: 2 additions & 4 deletions envelope.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ import (
"github.com/jaytaylor/html2text"
"github.com/jhillyerd/enmime/internal/coding"
inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"

"github.com/pkg/errors"
)

Expand Down Expand Up @@ -232,7 +230,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
var charset string
var isHTML bool
if ctype := root.Header.Get(hnContentType); ctype != "" {
if mediatype, mparams, _, err := mediatype.Parse(ctype); err == nil {
if mediatype, mparams, _, err := root.parseMediaType(ctype); err == nil {
isHTML = (mediatype == ctTextHTML)
if mparams[hpCharset] != "" {
charset = mparams[hpCharset]
Expand Down Expand Up @@ -271,7 +269,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
func parseMultiPartBody(root *Part, e *Envelope) error {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mediatype, params, _, err := mediatype.Parse(ctype)
mediatype, params, _, err := root.parseMediaType(ctype)
if err != nil {
return fmt.Errorf("unable to parse media type: %v", err)
}
Expand Down
13 changes: 13 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,16 @@ type rawContentOption bool
func (o rawContentOption) apply(p *Parser) {
p.rawContent = bool(o)
}

// SetCustomParseMediaType if provided, will be used to parse media type instead of the default ParseMediaType
// function. This may be used to parse media type parameters that would otherwise be considered malformed.
// By default parsing happens using ParseMediaType
func SetCustomParseMediaType(customParseMediaType CustomParseMediaType) Option {
return parseMediaTypeOption(customParseMediaType)
}

type parseMediaTypeOption CustomParseMediaType

func (o parseMediaTypeOption) apply(p *Parser) {
p.customParseMediaType = CustomParseMediaType(o)
}
90 changes: 90 additions & 0 deletions options_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package enmime

import (
"fmt"
"strings"
"testing"
)

func TestSetCustomParseMediaType(t *testing.T) {
alwaysReturnHTML := func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
return "text/html", nil, nil, err
}
changeAndUtilizeDefault := func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
modifiedStr := strings.ReplaceAll(ctype, "application/Pamir Viewer", "application/PamirViewer")
return ParseMediaType(modifiedStr)
}
tcases := []struct {
ctype string
want string
customParseMediaType CustomParseMediaType
}{
{
ctype: "text/plain",
want: "text/plain",
customParseMediaType: nil,
},
{
ctype: "text/plain",
want: "text/html",
customParseMediaType: alwaysReturnHTML,
},
{
ctype: "text/plain; charset=utf-8",
want: "text/html",
customParseMediaType: alwaysReturnHTML,
},
{
ctype: "application/Pamir Viewer; name=\"2023-384.pmrv\"",
want: "application/pamirviewer",
customParseMediaType: changeAndUtilizeDefault,
},
}

for _, tcase := range tcases {
p := &Part{parser: NewParser(SetCustomParseMediaType(tcase.customParseMediaType))}

got, _, _, _ := p.parseMediaType(tcase.ctype)
if got != tcase.want {
t.Errorf("Parser.parseMediaType(%v) == %v, want: %v",
tcase.ctype, got, tcase.want)
}
}
}

func ExampleSetCustomParseMediaType() {
// for the sake of simplicity replaces space in a very specific invalid content-type: "application/Pamir Viewer"
replaceSpecificContentType := func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
modifiedStr := strings.ReplaceAll(ctype, "application/Pamir Viewer", "application/PamirViewer")

return ParseMediaType(modifiedStr)
}

invalidMessageContent := `From: <enmime@parser.git>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_000F_01D9FAC6.09EB3B60"

------=_NextPart_000_000F_01D9FAC6.09EB3B60
Content-Type: application/Pamir Viewer;
name="2023-10-13.pmrv"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="2023-10-13.pmrv"

f6En7vFpNql3tfMkoKABP1iBEf+M/qF6LCAIvyRbpH6uDCqcKKGmH3e6OiqN5eCfqUk=
`

p := NewParser(SetCustomParseMediaType(replaceSpecificContentType))
e, err := p.ReadEnvelope(strings.NewReader(invalidMessageContent))

fmt.Println(err)
fmt.Println(len(e.Attachments))
fmt.Println(e.Attachments[0].ContentType)
fmt.Println(e.Attachments[0].FileName)

// Output:
// <nil>
// 1
// application/pamirviewer
// 2023-10-13.pmrv
}
4 changes: 4 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ func AllowCorruptTextPartErrorPolicy(p *Part, err error) bool {
return false
}

// CustomParseMediaType parses media type. See ParseMediaType for more details
type CustomParseMediaType func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error)

// Parser parses MIME.
// Default parser is a valid one.
type Parser struct {
Expand All @@ -21,6 +24,7 @@ type Parser struct {
readPartErrorPolicy ReadPartErrorPolicy
skipMalformedParts bool
rawContent bool
customParseMediaType CustomParseMediaType
}

// defaultParser is a Parser with default configuration.
Expand Down
15 changes: 11 additions & 4 deletions part.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ import (
"github.com/gogs/chardet"
"github.com/jhillyerd/enmime/internal/coding"
inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"

"github.com/pkg/errors"
)

Expand Down Expand Up @@ -126,7 +124,7 @@ func (p *Part) setupHeaders(r *bufio.Reader, defaultContentType string) error {
ctype = defaultContentType
}
// Parse Content-Type header.
mtype, mparams, minvalidParams, err := mediatype.Parse(ctype)
mtype, mparams, minvalidParams, err := p.parseMediaType(ctype)
if err != nil {
return err
}
Expand All @@ -149,7 +147,7 @@ func (p *Part) setupHeaders(r *bufio.Reader, defaultContentType string) error {
func (p *Part) setupContentHeaders(mediaParams map[string]string) {
header := inttp.MIMEHeader(p.Header)
// Determine content disposition, filename, character set.
disposition, dparams, _, err := mediatype.Parse(header.Get(hnContentDisposition))
disposition, dparams, _, err := p.parseMediaType(header.Get(hnContentDisposition))
if err == nil {
// Disposition is optional
p.Disposition = disposition
Expand Down Expand Up @@ -327,6 +325,15 @@ func (p *Part) decodeContent(r io.Reader, readPartErrorPolicy ReadPartErrorPolic
return nil
}

// parses media type using custom or default media type parser
func (p *Part) parseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
if p.parser == nil || p.parser.customParseMediaType == nil {
return ParseMediaType(ctype)
}

return p.parser.customParseMediaType(ctype)
}

// IsBase64CorruptInputError returns true when err is of type base64.CorruptInputError.
//
// It can be used to create ReadPartErrorPolicy functions.
Expand Down