diff --git a/options.go b/options.go index b7e6fd2..8b3104a 100644 --- a/options.go +++ b/options.go @@ -100,3 +100,15 @@ func (o disableTextConversionOption) apply(p *Parser) { func DisableTextConversion(disableTextConversion bool) Option { return disableTextConversionOption(disableTextConversion) } + +type disableCharacterDetectionOption bool + +func (o disableCharacterDetectionOption) apply(p *Parser) { + p.disableCharacterDetection = bool(o) +} + +// DisableCharacterDetection sets the disableCharacterDetection option. When true, the parser will use the +// defined character set if it is defined in the message part. +func DisableCharacterDetection(disableCharacterDetection bool) Option { + return disableCharacterDetectionOption(disableCharacterDetection) +} diff --git a/parser.go b/parser.go index 2ca6939..9e57124 100644 --- a/parser.go +++ b/parser.go @@ -27,6 +27,7 @@ type Parser struct { customParseMediaType CustomParseMediaType stripMediaTypeInvalidCharacters bool disableTextConversion bool + disableCharacterDetection bool } // defaultParser is a Parser with default configuration. diff --git a/part.go b/part.go index 5aa9e37..579f015 100644 --- a/part.go +++ b/part.go @@ -210,7 +210,8 @@ func (p *Part) convertFromDetectedCharset(r io.Reader, readPartErrorPolicy ReadP // Restore r. r = bytes.NewReader(buf) - if cs == nil || cs.Confidence < minCharsetConfidence || len(bytes.Runes(buf)) < minCharsetRuneLength { + if (p.parser.disableCharacterDetection && p.Charset != "") || + (cs == nil || cs.Confidence < minCharsetConfidence || len(bytes.Runes(buf)) < minCharsetRuneLength) { // Low confidence or not enough characters, use declared character set. return p.convertFromStatedCharset(r), nil } diff --git a/part_test.go b/part_test.go index 8a7774e..4be0d59 100644 --- a/part_test.go +++ b/part_test.go @@ -1318,3 +1318,28 @@ func TestCtypeInvalidCharacters(t *testing.T) { test.ComparePart(t, p, wantp) } + +func TestDisableCharacterDetectionPart(t *testing.T) { + var wantp *enmime.Part + + // chardet considers this test file to be ISO-8859-1. + r := test.OpenTestData("parts", "chardet-detection.raw") + parser := enmime.NewParser(enmime.DisableCharacterDetection(true)) + p, err := parser.ReadParts(r) + + // Examine root + if err != nil { + t.Fatalf("Unexpected parse error: %+v", err) + } + if p == nil { + t.Fatal("Root node should not be nil") + } + + wantp = &enmime.Part{ + ContentType: "text/plain", + PartID: "0", + Charset: "utf-8", + } + + test.ComparePart(t, p, wantp) +} diff --git a/testdata/parts/chardet-detection.raw b/testdata/parts/chardet-detection.raw new file mode 100644 index 0000000..351d9af --- /dev/null +++ b/testdata/parts/chardet-detection.raw @@ -0,0 +1,8 @@ +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: quoted-printable + +Loggen Sie sich ein, um die Einladung zu akzeptieren oder geben Sie den fol= +gen1233 + +Nachricht: +=C3=B6o=C3=B6o