diff --git a/imap/envelope.go b/imap/envelope.go
index 4de47adf..8a7b8845 100644
--- a/imap/envelope.go
+++ b/imap/envelope.go
@@ -1,10 +1,10 @@
package imap
import (
+ "github.com/ProtonMail/gluon/rfc5322"
"net/mail"
"strings"
- "github.com/ProtonMail/gluon/internal/parser"
"github.com/ProtonMail/gluon/rfc822"
"github.com/sirupsen/logrus"
)
@@ -29,27 +29,24 @@ func envelope(header *rfc822.Header, c *paramList, writer parListWriter) error {
addString(writer, header.Get("Date")).
addString(writer, header.Get("Subject"))
- addressParser := parser.NewRFC5322AddressListParser()
- defer addressParser.Close()
-
if v, ok := header.GetChecked("From"); !ok {
fields.addString(writer, "")
} else {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
}
if v, ok := header.GetChecked("Sender"); ok {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
} else if v, ok := header.GetChecked("From"); ok {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
} else {
fields.addString(writer, "")
}
if v, ok := header.GetChecked("Reply-To"); ok {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
} else if v, ok := header.GetChecked("From"); ok {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
} else {
fields.addString(writer, "")
}
@@ -57,19 +54,19 @@ func envelope(header *rfc822.Header, c *paramList, writer parListWriter) error {
if v, ok := header.GetChecked("To"); !ok {
fields.addString(writer, "")
} else {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
}
if v, ok := header.GetChecked("Cc"); !ok {
fields.addString(writer, "")
} else {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
}
if v, ok := header.GetChecked("Bcc"); !ok {
fields.addString(writer, "")
} else {
- fields.addAddresses(writer, tryParseAddressList(addressParser, v))
+ fields.addAddresses(writer, tryParseAddressList(v))
}
fields.addString(writer, header.Get("In-Reply-To"))
@@ -79,8 +76,8 @@ func envelope(header *rfc822.Header, c *paramList, writer parListWriter) error {
return nil
}
-func tryParseAddressList(parser *parser.RFC5322AddressListParser, val string) []*mail.Address {
- addr, err := parser.Parse(val)
+func tryParseAddressList(val string) []*mail.Address {
+ addr, err := rfc5322.ParseAddressList(val)
if err != nil {
logrus.WithError(err).Error("Failed to parse address")
return []*mail.Address{{Name: val}}
diff --git a/rfc5322/address.go b/rfc5322/address.go
new file mode 100644
index 00000000..8895eaf2
--- /dev/null
+++ b/rfc5322/address.go
@@ -0,0 +1,554 @@
+package rfc5322
+
+import (
+ "net/mail"
+
+ "github.com/ProtonMail/gluon/rfcparser"
+)
+
+// 3.4. Address Specification
+
+func parseAddressList(p *Parser) ([]*mail.Address, error) {
+ // address-list = (address *("," address)) / obs-addr-list
+ // *([CFWS] ",") address *("," [address / CFWS])
+ // We extended this rule to allow ';' as separator
+ var result []*mail.Address
+
+ isSep := func(tokenType rfcparser.TokenType) bool {
+ return tokenType == rfcparser.TokenTypeComma || tokenType == rfcparser.TokenTypeSemicolon
+ }
+
+ // *([CFWS] ",")
+ for {
+ if _, err := tryParseCFWS(p.parser); err != nil {
+ return nil, err
+ }
+
+ if ok, err := p.parser.MatchesWith(isSep); err != nil {
+ return nil, err
+ } else if !ok {
+ break
+ }
+ }
+
+ var groupConsumedSemiColon bool
+ // Address
+ {
+ addr, gConsumedSemiColon, err := parseAddress(p)
+ if err != nil {
+ return nil, err
+ }
+
+ groupConsumedSemiColon = gConsumedSemiColon
+
+ result = append(result, addr...)
+ }
+
+ // *("," [address / CFWS])
+ for {
+ if ok, err := p.parser.MatchesWith(isSep); err != nil {
+ return nil, err
+ } else if !ok { // see `parseAddress` comment about why this is necessary.
+ if !groupConsumedSemiColon || p.parser.CurrentToken().TType == rfcparser.TokenTypeEOF {
+ break
+ }
+ }
+
+ if ok, err := tryParseCFWS(p.parser); err != nil {
+ return nil, err
+ } else if ok {
+ // Only continue if the next input is EOF or comma or we can run into issues with parsring
+ // the `',' address` rules.
+ if p.parser.Check(rfcparser.TokenTypeEOF) || p.parser.CheckWith(isSep) {
+ continue
+ }
+ }
+
+ // address
+ addr, consumedSemiColon, err := parseAddress(p)
+ if err != nil {
+ return nil, err
+ }
+
+ groupConsumedSemiColon = consumedSemiColon
+
+ result = append(result, addr...)
+ }
+
+ return result, nil
+}
+
+// The boolean parameter represents whether a group consumed a ';' separator. This is necessary to disambiguate
+// an address list where we have the sequence ` g:
;` since we also allow groups to have optional
+// `;` terminators.
+func parseAddress(p *Parser) ([]*mail.Address, bool, error) {
+ // address = mailbox / group
+ // name-addr = [display-name] angle-addr
+ // group = display-name ":" [group-list] ";" [CFWS]
+ //
+ if _, err := tryParseCFWS(p.parser); err != nil {
+ return nil, false, err
+ }
+
+ // check addr-spec standalone
+ if p.parser.Check(rfcparser.TokenTypeLess) {
+ addr, err := parseAngleAddr(p.parser)
+ if err != nil {
+ return nil, false, err
+ }
+
+ return []*mail.Address{{
+ Name: "",
+ Address: addr,
+ }}, false, nil
+ }
+
+ parserState := p.SaveState()
+
+ if address, err := parseMailbox(p); err == nil {
+ return []*mail.Address{
+ address,
+ }, false, nil
+ }
+
+ p.RestoreState(parserState)
+
+ group, didConsumeSemicolon, err := parseGroup(p)
+ if err != nil {
+ return nil, false, err
+ }
+
+ return group, didConsumeSemicolon, nil
+}
+
+func parseGroup(p *Parser) ([]*mail.Address, bool, error) {
+ // nolint:dupword
+ // group = display-name ":" [group-list] ";" [CFWS]
+ // group-list = mailbox-list / CFWS / obs-group-list
+ // obs-group-list = 1*([CFWS] ",") [CFWS]
+ //
+ // nolint:dupword
+ // mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+ // obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+ //
+ // This version has been relaxed so that the ';' is optional. and that a group can be wrapped in `"`
+ hasQuotes, err := p.parser.Matches(rfcparser.TokenTypeDQuote)
+ if err != nil {
+ return nil, false, err
+ }
+
+ if _, err := parseDisplayName(p.parser); err != nil {
+ return nil, false, err
+ }
+
+ if err := p.parser.Consume(rfcparser.TokenTypeColon, "expected ':' for group start"); err != nil {
+ return nil, false, err
+ }
+
+ var didConsumeSemicolon bool
+
+ var result []*mail.Address
+
+ if ok, err := p.parser.Matches(rfcparser.TokenTypeSemicolon); err != nil {
+ return nil, false, err
+ } else if !ok {
+
+ // *([CFWS] ",")
+ for {
+ if _, err := tryParseCFWS(p.parser); err != nil {
+ return nil, false, err
+ }
+
+ if ok, err := p.parser.Matches(rfcparser.TokenTypeComma); err != nil {
+ return nil, false, err
+ } else if !ok {
+ break
+ }
+ }
+
+ // Mailbox
+ var parsedFirstMailbox bool
+
+ {
+ parserState := p.SaveState()
+ mailbox, err := parseMailbox(p)
+ if err != nil {
+ p.RestoreState(parserState)
+ } else {
+ parsedFirstMailbox = true
+ result = append(result, mailbox)
+ }
+ }
+
+ // *("," [mailbox / CFWS])
+ if parsedFirstMailbox {
+ for {
+ if ok, err := p.parser.Matches(rfcparser.TokenTypeComma); err != nil {
+ return nil, false, err
+ } else if !ok {
+ break
+ }
+
+ if ok, err := tryParseCFWS(p.parser); err != nil {
+ return nil, false, err
+ } else if ok {
+ continue
+ }
+
+ // Mailbox
+ mailbox, err := parseMailbox(p)
+ if err != nil {
+ return nil, false, err
+ }
+
+ result = append(result, mailbox)
+ }
+ }
+
+ consumedSemicolon, err := p.parser.Matches(rfcparser.TokenTypeSemicolon)
+ if err != nil {
+ return nil, false, err
+ }
+
+ didConsumeSemicolon = consumedSemicolon
+ } else {
+ didConsumeSemicolon = true
+ }
+
+ if _, err := tryParseCFWS(p.parser); err != nil {
+ return nil, false, err
+ }
+
+ if hasQuotes {
+ if err := p.parser.Consume(rfcparser.TokenTypeDQuote, `expected '"' for group end`); err != nil {
+ return nil, false, err
+ }
+ }
+
+ return result, didConsumeSemicolon, nil
+}
+
+func parseMailbox(p *Parser) (*mail.Address, error) {
+ // mailbox = name-addr / addr-spec
+ parserState := p.SaveState()
+
+ if addr, err := parseNameAddr(p.parser); err == nil {
+ return addr, nil
+ }
+
+ p.RestoreState(parserState)
+
+ addr, err := parseAddrSpec(p.parser)
+ if err != nil {
+ return nil, err
+ }
+
+ return &mail.Address{
+ Address: addr,
+ }, nil
+}
+
+func parseNameAddr(p *rfcparser.Parser) (*mail.Address, error) {
+ // name-addr = [display-name] angle-addr
+ if _, err := tryParseCFWS(p); err != nil {
+ return nil, err
+ }
+
+ // Only has angle-addr component.
+ if p.Check(rfcparser.TokenTypeLess) {
+ address, err := parseAngleAddr(p)
+ if err != nil {
+ return nil, err
+ }
+
+ return &mail.Address{Address: address}, nil
+ }
+
+ displayName, err := parseDisplayName(p)
+ if err != nil {
+ return nil, err
+ }
+
+ address, err := parseAngleAddr(p)
+ if err != nil {
+ return nil, err
+ }
+
+ return &mail.Address{Address: address, Name: displayName}, nil
+}
+
+func parseAngleAddr(p *rfcparser.Parser) (string, error) {
+ // angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
+ // obs-angle-addr
+ //
+ // obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+ //
+ // obs-route = obs-domain-list ":"
+ //
+ // obs-domain-list = *(CFWS / ",") "@" domain
+ // *("," [CFWS] ["@" domain])
+ //
+ // This version has been extended so that add-rspec is optional
+ if _, err := tryParseCFWS(p); err != nil {
+ return "", err
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeLess, "expected < for angle-addr start"); err != nil {
+ return "", err
+ }
+
+ if ok, err := p.Matches(rfcparser.TokenTypeGreater); err != nil {
+ return "", err
+ } else if ok {
+ return "", nil
+ }
+
+ for {
+ if ok, err := tryParseCFWS(p); err != nil {
+ return "", err
+ } else if !ok {
+ if ok, err := p.Matches(rfcparser.TokenTypeComma); err != nil {
+ return "", err
+ } else if !ok {
+ break
+ }
+ }
+ }
+
+ if ok, err := p.Matches(rfcparser.TokenTypeAt); err != nil {
+ return "", err
+ } else if ok {
+ if _, err := parseDomain(p); err != nil {
+ return "", err
+ }
+
+ for {
+ if ok, err := p.Matches(rfcparser.TokenTypeComma); err != nil {
+ return "", err
+ } else if !ok {
+ break
+ }
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return "", err
+ }
+
+ if ok, err := p.Matches(rfcparser.TokenTypeAt); err != nil {
+ return "", err
+ } else if ok {
+ if _, err := parseDomain(p); err != nil {
+ return "", err
+ }
+ }
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeColon, "expected ':' for obs-route end"); err != nil {
+ return "", err
+ }
+ }
+
+ addr, err := parseAddrSpec(p)
+ if err != nil {
+ return "", err
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeGreater, "expected > for angle-addr end"); err != nil {
+ return "", err
+ }
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return "", err
+ }
+
+ return addr, nil
+}
+
+func parseDisplayName(p *rfcparser.Parser) (string, error) {
+ // display-name = phrase
+ phrase, err := parsePhrase(p)
+ if err != nil {
+ return "", err
+ }
+
+ return joinWithSpacingRules(phrase), nil
+}
+
+func parseAddrSpec(p *rfcparser.Parser) (string, error) {
+ // addr-spec = local-part "@" domain
+ // This version adds an option port extension : COLON ATOM
+ localPart, err := parseLocalPart(p)
+ if err != nil {
+ return "", err
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeAt, "expected @ after local-part"); err != nil {
+ return "", err
+ }
+
+ domain, err := parseDomain(p)
+ if err != nil {
+ return "", err
+ }
+
+ if ok, err := p.Matches(rfcparser.TokenTypeColon); err != nil {
+ return "", err
+ } else if ok {
+ port, err := parseAtom(p)
+ if err != nil {
+ return "", err
+ }
+
+ return localPart + "@" + domain + ":" + port.String.Value, nil
+ }
+
+ return localPart + "@" + domain, nil
+}
+
+func parseLocalPart(p *rfcparser.Parser) (string, error) {
+ // nolint:dupword
+ // local-part = dot-atom / quoted-string / obs-local-part
+ // obs-local-part = word *("." word)
+ // word = atom / quoted-string
+ // ^ above rule can be relaxed into just the last part, dot-atom just
+ // Local part extended
+ var words []parserString
+
+ {
+ word, err := parseWord(p)
+ if err != nil {
+ return "", err
+ }
+
+ words = append(words, word)
+ }
+
+ for {
+ if ok, err := p.Matches(rfcparser.TokenTypePeriod); err != nil {
+ return "", err
+ } else if !ok {
+ break
+ }
+
+ words = append(words, parserString{
+ String: rfcparser.String{
+ Value: ".",
+ Offset: p.PreviousToken().Offset,
+ },
+ Type: parserStringTypeUnspaced,
+ })
+
+ word, err := parseWord(p)
+ if err != nil {
+ return "", err
+ }
+
+ words = append(words, word)
+ }
+
+ return joinWithSpacingRules(words), nil
+}
+
+func parseDomain(p *rfcparser.Parser) (string, error) {
+ // domain = dot-atom / domain-literal / obs-domain
+ //
+ // obs-domain = atom *("." atom)
+ //
+ if _, err := tryParseCFWS(p); err != nil {
+ return "", err
+ }
+
+ if ok, err := p.Matches(rfcparser.TokenTypeLBracket); err != nil {
+ return "", err
+ } else if ok {
+ return parseDomainLiteral(p)
+ }
+
+ // obs-domain can be seen as a more restrictive dot-atom so we just use that rule instead.
+ dotAtom, err := parseDotAtom(p)
+ if err != nil {
+ return "", err
+ }
+
+ return dotAtom.Value, nil
+}
+
+func parseDomainLiteral(p *rfcparser.Parser) (string, error) {
+ // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+ //
+ // [CFWS] and "[" consumed before entry
+ //
+ result := []byte{'['}
+
+ for {
+ if _, err := tryParseFWS(p); err != nil {
+ return "", err
+ }
+
+ if ok, err := p.MatchesWith(isDText); err != nil {
+ return "", err
+ } else if !ok {
+ break
+ }
+
+ result = append(result, p.PreviousToken().Value)
+ }
+
+ if _, err := tryParseFWS(p); err != nil {
+ return "", err
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeRBracket, "expecetd ] for domain-literal end"); err != nil {
+ return "", err
+ }
+
+ result = append(result, ']')
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return "", err
+ }
+
+ return string(result), nil
+}
+
+func isDText(tokenType rfcparser.TokenType) bool {
+ // dtext = %d33-90 / ; Printable US-ASCII
+ // %d94-126 / ; characters not including
+ // obs-dtext ; "[", "]", or "\"
+ //
+ // obs-dtext = obs-NO-WS-CTL / quoted-pair // <- we have not included this
+ //
+ if rfcparser.IsCTL(tokenType) ||
+ tokenType == rfcparser.TokenTypeLBracket ||
+ tokenType == rfcparser.TokenTypeRBracket ||
+ tokenType == rfcparser.TokenTypeBackslash {
+ return false
+ }
+
+ return true
+}
+
+func joinWithSpacingRules(v []parserString) string {
+ result := v[0].String.Value
+
+ prevStrType := v[0].Type
+
+ for i := 1; i < len(v); i++ {
+ curStrType := v[i].Type
+
+ if prevStrType == parserStringTypeEncoded {
+ if curStrType == parserStringTypeOther {
+ result += " "
+ }
+ } else if prevStrType != parserStringTypeUnspaced {
+ if curStrType != parserStringTypeUnspaced {
+ result += " "
+ }
+ }
+
+ prevStrType = curStrType
+
+ result += v[i].String.Value
+ }
+
+ return result
+}
diff --git a/rfc5322/address_test.go b/rfc5322/address_test.go
new file mode 100644
index 00000000..5021e9a8
--- /dev/null
+++ b/rfc5322/address_test.go
@@ -0,0 +1,52 @@
+package rfc5322
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseAddrSpec(t *testing.T) {
+ inputs := map[string]string{
+ `pete(his account)@silly.test(his host)`: `pete@silly.test`,
+ `jdoe@machine.example`: `jdoe@machine.example`,
+ `john.q.public@example.com`: `john.q.public@example.com`,
+ `user@example.com`: `user@example.com`,
+ `user@[10.0.0.1]`: `user@[10.0.0.1]`,
+ `hořejšek@mail.com `: `hořejšek@mail.com`,
+ }
+
+ for i, e := range inputs {
+ t.Run(i, func(t *testing.T) {
+ p := newTestRFCParser(i)
+ v, err := parseAddrSpec(p)
+ require.NoError(t, err)
+ require.Equal(t, e, v)
+ })
+ }
+}
+
+func TestParseAngleAddr(t *testing.T) {
+ inputs := map[string]string{
+ ``: `pete@silly.test`,
+ ``: `jdoe@machine.example`,
+ ``: `john.q.public@example.com`,
+ ``: `user@example.com`,
+ ``: `user@[10.0.0.1]`,
+ ``: `hořejšek@mail.com`,
+ `<@foo.com:foo@bar.com>`: `foo@bar.com`,
+ `<,@foo.com:foo@bar.com>`: `foo@bar.com`,
+ `< @foo.com:foo@bar.com>`: `foo@bar.com`,
+ `<@foo.com,@bar.bar:foo@bar.com>`: `foo@bar.com`,
+ "<@foo.com,\r\n @bar.bar:foo@bar.com>": `foo@bar.com`,
+ }
+
+ for i, e := range inputs {
+ t.Run(i, func(t *testing.T) {
+ p := newTestRFCParser(i)
+ v, err := parseAngleAddr(p)
+ require.NoError(t, err)
+ require.Equal(t, e, v)
+ })
+ }
+}
diff --git a/rfc5322/atom.go b/rfc5322/atom.go
new file mode 100644
index 00000000..e4e7b46f
--- /dev/null
+++ b/rfc5322/atom.go
@@ -0,0 +1,318 @@
+package rfc5322
+
+// 3.2.4. Quoted Strings
+
+import (
+ "fmt"
+ "io"
+ "mime"
+
+ "github.com/ProtonMail/gluon/rfcparser"
+)
+
+func parseDotAtom(p *rfcparser.Parser) (rfcparser.String, error) {
+ // dot-atom = [CFWS] dot-atom-text [CFWS]
+ if _, err := tryParseCFWS(p); err != nil {
+ return rfcparser.String{}, err
+ }
+
+ atom, err := parseDotAtomText(p)
+ if err != nil {
+ return rfcparser.String{}, err
+ }
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return rfcparser.String{}, err
+ }
+
+ return atom, nil
+}
+
+func parseDotAtomText(p *rfcparser.Parser) (rfcparser.String, error) {
+ // dot-atom-text = 1*atext *("." 1*atext)
+ // This version has been extended to allow for trailing '.' files.
+ if err := p.ConsumeWith(isAText, "expected atext char for dot-atom-text"); err != nil {
+ return rfcparser.String{}, err
+ }
+
+ atom, err := p.CollectBytesWhileMatchesWithPrevWith(isAText)
+ if err != nil {
+ return rfcparser.String{}, err
+ }
+
+ for {
+ if ok, err := p.Matches(rfcparser.TokenTypePeriod); err != nil {
+ return rfcparser.String{}, err
+ } else if !ok {
+ break
+ }
+
+ atom.Value = append(atom.Value, '.')
+
+ if p.Check(rfcparser.TokenTypePeriod) {
+ return rfcparser.String{}, p.MakeError("invalid token after '.'")
+ }
+
+ // Early exit to allow trailing '.'
+ if !p.CheckWith(isAText) {
+ break
+ }
+
+ if err := p.ConsumeWith(isAText, "expected atext char for dot-atom-text"); err != nil {
+ return rfcparser.String{}, err
+ }
+
+ atomNext, err := p.CollectBytesWhileMatchesWithPrevWith(isAText)
+ if err != nil {
+ return rfcparser.String{}, err
+ }
+
+ atom.Value = append(atom.Value, atomNext.Value...)
+ }
+
+ return atom.IntoString(), nil
+}
+
+func parseAtom(p *rfcparser.Parser) (parserString, error) {
+ // atom = [CFWS] 1*atext [CFWS]
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ if err := p.ConsumeWith(isAText, "expected atext char for atom"); err != nil {
+ return parserString{}, err
+ }
+
+ atom, err := p.CollectBytesWhileMatchesWithPrevWith(isAText)
+ if err != nil {
+ return parserString{}, err
+ }
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ return parserString{
+ String: atom.IntoString(),
+ Type: parserStringTypeOther,
+ }, nil
+}
+
+var CharsetReader func(charset string, input io.Reader) (io.Reader, error)
+
+func parseEncodedAtom(p *rfcparser.Parser) (parserString, error) {
+ // encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+ //
+ // charset = token ; see section 3
+ //
+ // encoding = token ; see section 4
+ //
+ //
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ var fullWord string
+
+ startOffset := p.CurrentToken().Offset
+
+ if err := p.ConsumeBytesFold('=', '?'); err != nil {
+ return parserString{}, err
+ }
+
+ fullWord += "=?"
+
+ charset, err := p.CollectBytesWhileMatchesWith(isEncodedAtomToken)
+ if err != nil {
+ return parserString{}, err
+ }
+
+ fullWord += charset.IntoString().Value
+
+ if err := p.Consume(rfcparser.TokenTypeQuestion, "expected '?' after encoding charset"); err != nil {
+ return parserString{}, err
+ }
+
+ fullWord += "?"
+
+ if err := p.Consume(rfcparser.TokenTypeChar, "expected char after '?'"); err != nil {
+ return parserString{}, err
+ }
+
+ encoding := rfcparser.ByteToLower(p.PreviousToken().Value)
+ if encoding != 'q' && encoding != 'b' {
+ return parserString{}, p.MakeError("encoding should either be 'Q' or 'B'")
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeQuestion, "expected '?' after encoding byte"); err != nil {
+ return parserString{}, err
+ }
+
+ if encoding == 'b' {
+ fullWord += "B"
+ } else {
+ fullWord += "Q"
+ }
+
+ fullWord += "?"
+
+ encodedText, err := p.CollectBytesWhileMatchesWith(isEncodedText)
+ if err != nil {
+ return parserString{}, err
+ }
+
+ fullWord += encodedText.IntoString().Value
+
+ if err := p.ConsumeBytesFold('?', '='); err != nil {
+ return parserString{}, err
+ }
+
+ fullWord += "?="
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ decoder := mime.WordDecoder{CharsetReader: CharsetReader}
+
+ decoded, err := decoder.Decode(fullWord)
+ if err != nil {
+ return parserString{}, p.MakeErrorAtOffset(fmt.Sprintf("failed to decode encoded atom: %v", err), startOffset)
+ }
+
+ return parserString{
+ String: rfcparser.String{Value: decoded, Offset: startOffset},
+ Type: parserStringTypeEncoded,
+ }, nil
+}
+
+func isEncodedAtomToken(tokenType rfcparser.TokenType) bool {
+ // token = 1*
+ //
+ // specials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
+ // <"> / "/" / "[" / "]" / "?" / "." / "="
+ if rfcparser.IsCTL(tokenType) {
+ return false
+ }
+
+ switch tokenType { //nolint:exhaustive
+ case rfcparser.TokenTypeEOF:
+ fallthrough
+ case rfcparser.TokenTypeError:
+ fallthrough
+ case rfcparser.TokenTypeSP:
+ fallthrough
+ case rfcparser.TokenTypeLParen:
+ fallthrough
+ case rfcparser.TokenTypeRParen:
+ fallthrough
+ case rfcparser.TokenTypeLess:
+ fallthrough
+ case rfcparser.TokenTypeGreater:
+ fallthrough
+ case rfcparser.TokenTypeAt:
+ fallthrough
+ case rfcparser.TokenTypeComma:
+ fallthrough
+ case rfcparser.TokenTypeSemicolon:
+ fallthrough
+ case rfcparser.TokenTypeColon:
+ fallthrough
+ case rfcparser.TokenTypeDQuote:
+ fallthrough
+ case rfcparser.TokenTypeSlash:
+ fallthrough
+ case rfcparser.TokenTypeLBracket:
+ fallthrough
+ case rfcparser.TokenTypeRBracket:
+ fallthrough
+ case rfcparser.TokenTypeQuestion:
+ fallthrough
+ case rfcparser.TokenTypePeriod:
+ fallthrough
+ case rfcparser.TokenTypeEqual:
+ return false
+ default:
+ return true
+ }
+}
+
+func isEncodedText(tokenType rfcparser.TokenType) bool {
+ // encoded-text = 1*
+ // ; (but see "Use of encoded-words in message
+ // ; headers", section 5)
+ //
+ if rfcparser.IsCTL(tokenType) ||
+ tokenType == rfcparser.TokenTypeSP ||
+ tokenType == rfcparser.TokenTypeQuestion ||
+ tokenType == rfcparser.TokenTypeEOF ||
+ tokenType == rfcparser.TokenTypeError ||
+ tokenType == rfcparser.TokenTypeExtendedChar {
+ return false
+ }
+
+ return true
+}
+
+func isAText(tokenType rfcparser.TokenType) bool {
+ // atext = ALPHA / DIGIT / ; Printable US-ASCII
+ // "!" / "#" / ; characters not including
+ // "$" / "%" / ; specials. Used for atoms.
+ // "&" / "'" /
+ // "*" / "+" /
+ // "-" / "/" /
+ // "=" / "?" /
+ // "^" / "_" /
+ // "`" / "{" /
+ // "|" / "}" /
+ // "~"
+ switch tokenType { //nolint:exhaustive
+ case rfcparser.TokenTypeDigit:
+ fallthrough
+ case rfcparser.TokenTypeChar:
+ fallthrough
+ case rfcparser.TokenTypeExclamation:
+ fallthrough
+ case rfcparser.TokenTypeHash:
+ fallthrough
+ case rfcparser.TokenTypeDollar:
+ fallthrough
+ case rfcparser.TokenTypePercent:
+ fallthrough
+ case rfcparser.TokenTypeAmpersand:
+ fallthrough
+ case rfcparser.TokenTypeSQuote:
+ fallthrough
+ case rfcparser.TokenTypeAsterisk:
+ fallthrough
+ case rfcparser.TokenTypePlus:
+ fallthrough
+ case rfcparser.TokenTypeMinus:
+ fallthrough
+ case rfcparser.TokenTypeSlash:
+ fallthrough
+ case rfcparser.TokenTypeEqual:
+ fallthrough
+ case rfcparser.TokenTypeQuestion:
+ fallthrough
+ case rfcparser.TokenTypeCaret:
+ fallthrough
+ case rfcparser.TokenTypeUnderscore:
+ fallthrough
+ case rfcparser.TokenTyeBacktick:
+ fallthrough
+ case rfcparser.TokenTypeLCurly:
+ fallthrough
+ case rfcparser.TokenTypeRCurly:
+ fallthrough
+ case rfcparser.TokenTypePipe:
+ fallthrough
+ case rfcparser.TokenTypeExtendedChar: // RFC6532
+ fallthrough
+ case rfcparser.TokenTypeTilde:
+ return true
+ default:
+ return false
+ }
+}
diff --git a/rfc5322/atom_test.go b/rfc5322/atom_test.go
new file mode 100644
index 00000000..8c5cbca7
--- /dev/null
+++ b/rfc5322/atom_test.go
@@ -0,0 +1,39 @@
+package rfc5322
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseDotAtom(t *testing.T) {
+ inputs := map[string]string{
+ "foobar.!#$%'*+-=?^~_{}`|/": "foobar.!#$%'*+-=?^~_{}`|/",
+ " f.b ": "f.b",
+ " \r\n f.b": "f.b",
+ " \r\n f.b \r\n ": "f.b",
+ }
+
+ for i, e := range inputs {
+ p := newTestRFCParser(i)
+ v, err := parseDotAtom(p)
+ require.NoError(t, err)
+ require.Equal(t, e, v.Value)
+ }
+}
+
+func TestParseAtom(t *testing.T) {
+ inputs := map[string]string{
+ "foobar!#$%'*+-=?^~_{}`|/": "foobar!#$%'*+-=?^~_{}`|/",
+ " fb ": "fb",
+ " \r\n fb": "fb",
+ " \r\n fb \r\n ": "fb",
+ }
+
+ for i, e := range inputs {
+ p := newTestRFCParser(i)
+ v, err := parseDotAtom(p)
+ require.NoError(t, err)
+ require.Equal(t, e, v.Value)
+ }
+}
diff --git a/rfc5322/backtracing_scanner.go b/rfc5322/backtracing_scanner.go
new file mode 100644
index 00000000..83dded87
--- /dev/null
+++ b/rfc5322/backtracing_scanner.go
@@ -0,0 +1,94 @@
+package rfc5322
+
+import (
+ "bytes"
+ "io"
+)
+
+type BacktrackingByteScanner struct {
+ data []byte
+ offset int
+}
+
+func NewBacktrackingByteScanner(data []byte) *BacktrackingByteScanner {
+ return &BacktrackingByteScanner{
+ data: data,
+ }
+}
+
+type BacktrackingByteScannerScope struct {
+ offset int
+}
+
+func (bs *BacktrackingByteScanner) Read(dst []byte) (int, error) {
+ thisLen := len(bs.data)
+
+ if bs.offset >= thisLen {
+ return 0, io.EOF
+ }
+
+ dstLen := len(dst)
+
+ if bs.offset+dstLen >= thisLen {
+ bytesRead := thisLen - bs.offset
+
+ copy(dst, bs.data[bs.offset:])
+
+ return bytesRead, nil
+ }
+
+ nextOffset := bs.offset + dstLen
+
+ copy(dst, bs.data[bs.offset:nextOffset])
+
+ bs.offset = nextOffset
+
+ return dstLen, nil
+}
+
+func (bs *BacktrackingByteScanner) ReadByte() (byte, error) {
+ if bs.offset >= len(bs.data) {
+ return 0, io.EOF
+ }
+
+ b := bs.data[bs.offset]
+
+ bs.offset++
+
+ return b, nil
+}
+
+func (bs *BacktrackingByteScanner) ReadBytes(delim byte) ([]byte, error) {
+ if bs.offset >= len(bs.data) {
+ return nil, io.EOF
+ }
+
+ var result []byte
+
+ index := bytes.IndexByte(bs.data[bs.offset:], delim)
+ if index < 0 {
+ copy(result, bs.data[bs.offset:])
+ bs.offset = len(bs.data)
+
+ return result, nil
+ }
+
+ nextOffset := bs.offset + index + 1
+ if nextOffset >= len(bs.data) {
+ copy(result, bs.data[bs.offset:])
+ bs.offset = len(bs.data)
+ } else {
+ copy(result, bs.data[bs.offset:nextOffset])
+ bs.offset = nextOffset
+ }
+
+ return result, nil
+}
+
+func (bs *BacktrackingByteScanner) SaveState() BacktrackingByteScannerScope {
+ return BacktrackingByteScannerScope{offset: bs.offset}
+}
+
+func (bs *BacktrackingByteScanner) RestoreState(scope BacktrackingByteScannerScope) {
+ bs.offset = scope.offset
+}
diff --git a/rfc5322/cfws.go b/rfc5322/cfws.go
new file mode 100644
index 00000000..5caf5d3a
--- /dev/null
+++ b/rfc5322/cfws.go
@@ -0,0 +1,307 @@
+package rfc5322
+
+import "github.com/ProtonMail/gluon/rfcparser"
+
+// Section 3.2.2 White space and Comments
+
+func tryParseCFWS(p *rfcparser.Parser) (bool, error) {
+ if !p.CheckWith(func(tokenType rfcparser.TokenType) bool {
+ return isWSP(tokenType) || tokenType == rfcparser.TokenTypeCR || tokenType == rfcparser.TokenTypeLParen
+ }) {
+ return false, nil
+ }
+
+ return true, parseCFWS(p)
+}
+
+func parseCFWS(p *rfcparser.Parser) error {
+ // CFWS = (1*([FWS] comment) [FWS]) / FWS
+ parsedFirstFWS, err := tryParseFWS(p)
+ if err != nil {
+ return err
+ }
+
+ // Handle case where it can just be FWS without comment
+ if !p.Check(rfcparser.TokenTypeLParen) {
+ if !parsedFirstFWS {
+ return p.MakeError("expected FWS or comment for CFWS")
+ }
+
+ return nil
+ }
+
+ if err := parseComment(p); err != nil {
+ return err
+ }
+
+ // Read remaining [FWS] comment
+ for {
+ if _, err := tryParseFWS(p); err != nil {
+ return err
+ }
+
+ if !p.Check(rfcparser.TokenTypeLParen) {
+ break
+ }
+
+ if err := parseComment(p); err != nil {
+ return err
+ }
+ }
+
+ if _, err := tryParseFWS(p); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func tryParseFWS(p *rfcparser.Parser) (bool, error) {
+ if !p.CheckWith(func(tokenType rfcparser.TokenType) bool {
+ return isWSP(tokenType) || tokenType == rfcparser.TokenTypeCR
+ }) {
+ return false, nil
+ }
+
+ return true, parseFWS(p)
+}
+
+func parseFWS(p *rfcparser.Parser) error {
+ // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
+ // ; Folding white space
+ // obs-FWS = 1*WSP *(CRLF 1*WSP)
+ //
+ // Parse 0 or more WSP
+ for {
+ if ok, err := p.MatchesWith(isWSP); err != nil {
+ return err
+ } else if !ok {
+ break
+ }
+ }
+
+ if !p.Check(rfcparser.TokenTypeCR) {
+ // Early exit.
+ return nil
+ }
+
+ if err := p.ConsumeNewLine(); err != nil {
+ return err
+ }
+
+ // Parse one or many WSP.
+ if err := p.ConsumeWith(isWSP, "expected WSP after CRLF"); err != nil {
+ return err
+ }
+
+ for {
+ if ok, err := p.MatchesWith(isWSP); err != nil {
+ return err
+ } else if !ok {
+ break
+ }
+ }
+
+ // Handle obs-FWS case where there can be multiple repeating loops
+ for {
+ if !p.Check(rfcparser.TokenTypeCR) {
+ break
+ }
+
+ if err := p.ConsumeNewLine(); err != nil {
+ return err
+ }
+
+ // Parse one or many WSP.
+ if err := p.ConsumeWith(isWSP, "expected WSP after CRLF"); err != nil {
+ return err
+ }
+
+ for {
+ if ok, err := p.MatchesWith(isWSP); err != nil {
+ return err
+ } else if !ok {
+ break
+ }
+ }
+ }
+
+ return nil
+}
+
+func parseCContent(p *rfcparser.Parser) error {
+ if ok, err := p.MatchesWith(isCText); err != nil {
+ return err
+ } else if ok {
+ return nil
+ }
+
+ if _, ok, err := tryParseQuotedPair(p); err != nil {
+ return err
+ } else if ok {
+ return nil
+ }
+
+ if p.Check(rfcparser.TokenTypeLParen) {
+ return parseComment(p)
+ }
+
+ return p.MakeError("unexpected ccontent token")
+}
+
+func parseComment(p *rfcparser.Parser) error {
+ if err := p.Consume(rfcparser.TokenTypeLParen, "expected ( for comment start"); err != nil {
+ return err
+ }
+
+ for {
+ if _, err := tryParseFWS(p); err != nil {
+ return err
+ }
+
+ if !p.CheckWith(func(tokenType rfcparser.TokenType) bool {
+ return isCText(tokenType) || tokenType == rfcparser.TokenTypeBackslash || tokenType == rfcparser.TokenTypeLParen
+ }) {
+ break
+ }
+
+ if err := parseCContent(p); err != nil {
+ return err
+ }
+ }
+
+ if _, err := tryParseFWS(p); err != nil {
+ return err
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeRParen, "expected ) for comment end"); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func tryParseQuotedPair(p *rfcparser.Parser) (byte, bool, error) {
+ if !p.Check(rfcparser.TokenTypeBackslash) {
+ return 0, false, nil
+ }
+
+ b, err := parseQuotedPair(p)
+ if err != nil {
+ return 0, false, err
+ }
+
+ return b, true, nil
+}
+
+func parseQuotedPair(p *rfcparser.Parser) (byte, error) {
+ // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
+ //
+ // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
+ //
+ if err := p.Consume(rfcparser.TokenTypeBackslash, "expected \\ for quoted pair start"); err != nil {
+ return 0, err
+ }
+
+ if ok, err := p.MatchesWith(isVChar); err != nil {
+ return 0, err
+ } else if ok {
+ return p.PreviousToken().Value, nil
+ }
+
+ if ok, err := p.MatchesWith(isWSP); err != nil {
+ return 0, err
+ } else if ok {
+ return p.PreviousToken().Value, nil
+ }
+
+ if ok, err := p.MatchesWith(func(tokenType rfcparser.TokenType) bool {
+ return isObsNoWSCTL(tokenType) ||
+ tokenType == rfcparser.TokenTypeCR ||
+ tokenType == rfcparser.TokenTypeLF ||
+ tokenType == rfcparser.TokenTypeZero
+ }); err != nil {
+ return 0, err
+ } else if ok {
+ return p.PreviousToken().Value, nil
+ }
+
+ return 0, p.MakeError("unexpected character for quoted pair")
+}
+
+func isWSP(tokenType rfcparser.TokenType) bool {
+ return tokenType == rfcparser.TokenTypeSP || tokenType == rfcparser.TokenTypeTab
+}
+
+func isCText(tokenType rfcparser.TokenType) bool {
+ // ctext = %d33-39 / ; Printable US-ASCII
+ // %d42-91 / ; characters not including
+ // %d93-126 / ; "(", ")", or "\"
+ // obs-ctext
+ //
+ // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
+ // %d11 / ; characters that do not
+ // %d12 / ; include the carriage
+ // %d14-31 / ; return, line feed, and
+ // %d127 ; white space characters
+ //
+ // obs-ctext = obs-NO-WS-CTL
+ switch tokenType { // nolint:exhaustive
+ case rfcparser.TokenTypeEOF:
+ fallthrough
+ case rfcparser.TokenTypeError:
+ fallthrough
+ case rfcparser.TokenTypeLParen:
+ fallthrough
+ case rfcparser.TokenTypeRParen:
+ fallthrough
+ case rfcparser.TokenTypeCR:
+ fallthrough
+ case rfcparser.TokenTypeTab:
+ fallthrough
+ case rfcparser.TokenTypeLF:
+ fallthrough
+ case rfcparser.TokenTypeSP:
+ fallthrough
+ case rfcparser.TokenTypeBackslash:
+ return false
+ default:
+ return true
+ }
+}
+
+func isObsNoWSCTL(tokenType rfcparser.TokenType) bool {
+ // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
+ // %d11 / ; characters that do not
+ // %d12 / ; include the carriage
+ // %d14-31 / ; return, line feed, and
+ // %d127 ; white space characters
+ switch tokenType { // nolint:exhaustive
+ case rfcparser.TokenTypeEOF:
+ fallthrough
+ case rfcparser.TokenTypeError:
+ fallthrough
+ case rfcparser.TokenTypeCR:
+ fallthrough
+ case rfcparser.TokenTypeTab:
+ fallthrough
+ case rfcparser.TokenTypeLF:
+ fallthrough
+ case rfcparser.TokenTypeSP:
+ return false
+ default:
+ return rfcparser.IsCTL(tokenType) || tokenType == rfcparser.TokenTypeDelete
+ }
+}
+
+func isVChar(tokenType rfcparser.TokenType) bool {
+ // VChar %x21-7E
+ if rfcparser.IsCTL(tokenType) ||
+ tokenType == rfcparser.TokenTypeDelete ||
+ tokenType == rfcparser.TokenTypeError ||
+ tokenType == rfcparser.TokenTypeEOF {
+ return false
+ }
+
+ return true
+}
diff --git a/rfc5322/cfws_test.go b/rfc5322/cfws_test.go
new file mode 100644
index 00000000..3f991dc6
--- /dev/null
+++ b/rfc5322/cfws_test.go
@@ -0,0 +1,58 @@
+package rfc5322
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseFWS(t *testing.T) {
+ inputs := []string{
+ " \t ",
+ "\r\n\t",
+ " \r\n\t",
+ " \r\n \r\n \r\n\t",
+ " \t\r\n ",
+ }
+
+ for _, i := range inputs {
+ p := newTestRFCParser(i)
+ err := parseFWS(p)
+ require.NoError(t, err)
+ }
+}
+
+func TestParserComment(t *testing.T) {
+ inputs := []string{
+ "(my comment here)",
+ "(my comment here )",
+ "( my comment here)",
+ "( my comment here )",
+ "(my\r\n comment here)",
+ "(my\r\n (comment) here)",
+ "(\\my\r\n (comment) here)",
+ "(" + string([]byte{0x7F, 0x8}) + ")",
+ }
+
+ for _, i := range inputs {
+ p := newTestRFCParser(i)
+ err := parseComment(p)
+ require.NoError(t, err)
+ }
+}
+
+func TestParserCFWS(t *testing.T) {
+ inputs := []string{
+ " ",
+ "(my comment here)",
+ " (my comment here) ",
+ " \r\n (my comment here) ",
+ " \r\n \r\n (my comment here) \r\n ",
+ }
+
+ for _, i := range inputs {
+ p := newTestRFCParser(i)
+ err := parseCFWS(p)
+ require.NoError(t, err)
+ }
+}
diff --git a/rfc5322/miscelleaneous.go b/rfc5322/miscelleaneous.go
new file mode 100644
index 00000000..18b80552
--- /dev/null
+++ b/rfc5322/miscelleaneous.go
@@ -0,0 +1,80 @@
+package rfc5322
+
+import (
+ "github.com/ProtonMail/gluon/rfcparser"
+)
+
+// 3.2.5. Miscellaneous Tokens
+
+func parseWord(p *rfcparser.Parser) (parserString, error) {
+ // word = atom / quoted-string
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ if p.Check(rfcparser.TokenTypeEqual) {
+ return parseEncodedAtom(p)
+ }
+
+ if p.Check(rfcparser.TokenTypeDQuote) {
+ return parseQuotedString(p)
+ }
+
+ result, err := parseAtom(p)
+ if err != nil {
+ return parserString{}, err
+ }
+
+ return result, nil
+}
+
+func parsePhrase(p *rfcparser.Parser) ([]parserString, error) {
+ // nolint:dupword
+ // phrase = 1*word / obs-phrase
+ // obs-phrase = word *(word / "." / CFWS)
+ // This version has been extended to allow '@' to appear in obs-phrase
+ word, err := parseWord(p)
+ if err != nil {
+ return nil, err
+ }
+
+ var result = []parserString{word}
+
+ isSep := func(tokenType rfcparser.TokenType) bool {
+ return tokenType == rfcparser.TokenTypePeriod || tokenType == rfcparser.TokenTypeAt
+ }
+
+ for {
+ // check period case
+ if ok, err := p.MatchesWith(isSep); err != nil {
+ return nil, err
+ } else if ok {
+ prevToken := p.PreviousToken()
+ result = append(result, parserString{
+ String: rfcparser.String{
+ Value: string(prevToken.Value),
+ Offset: prevToken.Offset,
+ },
+ Type: parserStringTypeUnspaced,
+ })
+ continue
+ }
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return nil, err
+ }
+
+ if !(p.CheckWith(isAText) || p.Check(rfcparser.TokenTypeDQuote)) {
+ break
+ }
+
+ nextWord, err := parseWord(p)
+ if err != nil {
+ return nil, err
+ }
+
+ result = append(result, nextWord)
+ }
+
+ return result, nil
+}
diff --git a/rfc5322/miscelleaneous_test.go b/rfc5322/miscelleaneous_test.go
new file mode 100644
index 00000000..8248c4c9
--- /dev/null
+++ b/rfc5322/miscelleaneous_test.go
@@ -0,0 +1,41 @@
+package rfc5322
+
+import (
+ "testing"
+
+ "github.com/bradenaw/juniper/xslices"
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseWord(t *testing.T) {
+ inputs := map[string]string{
+ `"f\".c"`: "f\".c",
+ "\" \r\n f\\\".c\r\n \"": " f\".c ",
+ ` " foo bar derer " `: " foo bar derer ",
+ `foo`: "foo",
+ }
+
+ for i, e := range inputs {
+ p := newTestRFCParser(i)
+ v, err := parseWord(p)
+ require.NoError(t, err)
+ require.Equal(t, e, v.String.Value)
+ }
+}
+
+func TestParsePhrase(t *testing.T) {
+ inputs := map[string][]string{
+ `foo "quoted"`: {"foo", "quoted"},
+ `"f\".c" "quoted"`: {"f\".c", "quoted"},
+ `foo bar`: {"foo", "bar"},
+ `foo.bar`: {"foo", ".", "bar"},
+ `foo . bar`: {"foo", ".", "bar"},
+ }
+
+ for i, e := range inputs {
+ p := newTestRFCParser(i)
+ v, err := parsePhrase(p)
+ require.NoError(t, err)
+ require.Equal(t, e, xslices.Map(v, func(v parserString) string { return v.String.Value }))
+ }
+}
diff --git a/rfc5322/parser.go b/rfc5322/parser.go
new file mode 100644
index 00000000..250dde14
--- /dev/null
+++ b/rfc5322/parser.go
@@ -0,0 +1,83 @@
+package rfc5322
+
+import (
+ "net/mail"
+
+ "github.com/ProtonMail/gluon/rfcparser"
+)
+
+type Parser struct {
+ source *BacktrackingByteScanner
+ scanner *rfcparser.Scanner
+ parser *rfcparser.Parser
+}
+
+type parserStringType int
+
+const (
+ parserStringTypeOther parserStringType = iota
+ parserStringTypeUnspaced
+ parserStringTypeEncoded
+)
+
+type parserString struct {
+ String rfcparser.String
+ Type parserStringType
+}
+
+func ParseAddress(input string) ([]*mail.Address, error) {
+ source := NewBacktrackingByteScanner([]byte(input))
+ scanner := rfcparser.NewScannerWithReader(source)
+ parser := rfcparser.NewParser(scanner)
+
+ p := Parser{
+ source: source,
+ scanner: scanner,
+ parser: parser,
+ }
+
+ if err := p.parser.Advance(); err != nil {
+ return nil, err
+ }
+
+ addr, _, err := parseAddress(&p)
+
+ return addr, err
+}
+
+func ParseAddressList(input string) ([]*mail.Address, error) {
+ source := NewBacktrackingByteScanner([]byte(input))
+ scanner := rfcparser.NewScannerWithReader(source)
+ parser := rfcparser.NewParser(scanner)
+
+ p := Parser{
+ source: source,
+ scanner: scanner,
+ parser: parser,
+ }
+
+ if err := p.parser.Advance(); err != nil {
+ return nil, err
+ }
+
+ return parseAddressList(&p)
+}
+
+type ParserState struct {
+ scanner BacktrackingByteScannerScope
+ parser rfcparser.ParserState
+}
+
+func (p *Parser) SaveState() ParserState {
+ scannerScope := p.source.SaveState()
+
+ return ParserState{
+ scanner: scannerScope,
+ parser: p.parser.SaveState(),
+ }
+}
+
+func (p *Parser) RestoreState(s ParserState) {
+ p.source.RestoreState(s.scanner)
+ p.parser.RestoreState(s.parser)
+}
diff --git a/rfc5322/parser_test.go b/rfc5322/parser_test.go
new file mode 100644
index 00000000..a74554e5
--- /dev/null
+++ b/rfc5322/parser_test.go
@@ -0,0 +1,832 @@
+package rfc5322
+
+import (
+ "bytes"
+ "net/mail"
+ "testing"
+
+ "github.com/ProtonMail/gluon/rfcparser"
+ "github.com/stretchr/testify/assert"
+)
+
+func newTestRFCParser(s string) *rfcparser.Parser {
+ p := rfcparser.NewParser(rfcparser.NewScanner(bytes.NewReader([]byte(s))))
+ if p.Advance() != nil {
+ panic("failed to advance parser")
+ }
+
+ return p
+}
+
+func TestParseAddress(t *testing.T) {
+ tests := []struct {
+ input string
+ addrs []*mail.Address
+ }{
+ {
+ input: `user@example.com`,
+ addrs: []*mail.Address{{
+ Address: `user@example.com`,
+ }},
+ },
+ {
+ input: `John Doe `,
+ addrs: []*mail.Address{{
+ Name: `John Doe`,
+ Address: `jdoe@machine.example`,
+ }},
+ },
+ {
+ input: `Mary Smith `,
+ addrs: []*mail.Address{{
+ Name: `Mary Smith`,
+ Address: `mary@example.net`,
+ }},
+ },
+ {
+ input: `"Joe Q. Public" `,
+ addrs: []*mail.Address{{
+ Name: `Joe Q. Public`,
+ Address: `john.q.public@example.com`,
+ }},
+ },
+ {
+ input: `Mary Smith `,
+ addrs: []*mail.Address{{
+ Name: `Mary Smith`,
+ Address: `mary@x.test`,
+ }},
+ },
+ {
+ input: `jdoe@example.org`,
+ addrs: []*mail.Address{{
+ Address: `jdoe@example.org`,
+ }},
+ },
+ {
+ input: `Who? `,
+ addrs: []*mail.Address{{
+ Name: `Who?`,
+ Address: `one@y.test`,
+ }},
+ },
+ {
+ input: ``,
+ addrs: []*mail.Address{{
+ Address: `boss@nil.test`,
+ }},
+ },
+ {
+ input: `"Giant; \"Big\" Box" `,
+ addrs: []*mail.Address{{
+ Name: `Giant; "Big" Box`,
+ Address: `sysservices@example.net`,
+ }},
+ },
+ {
+ input: `Pete `,
+ addrs: []*mail.Address{{
+ Name: `Pete`,
+ Address: `pete@silly.example`,
+ }},
+ },
+ {
+ input: `"Mary Smith: Personal Account" `,
+ addrs: []*mail.Address{{
+ Name: `Mary Smith: Personal Account`,
+ Address: `smith@home.example`,
+ }},
+ },
+ {
+ input: `Pete(A nice \) chap) `,
+ addrs: []*mail.Address{{
+ Name: `Pete`,
+ Address: `pete@silly.test`,
+ }},
+ },
+ {
+ input: `Gogh Fir `,
+ addrs: []*mail.Address{{
+ Name: `Gogh Fir`,
+ Address: `gf@example.com`,
+ }},
+ },
+ {
+ input: `normal name `,
+ addrs: []*mail.Address{{
+ Name: `normal name`,
+ Address: `username@server.com`,
+ }},
+ },
+ {
+ input: `"comma, name" `,
+ addrs: []*mail.Address{{
+ Name: `comma, name`,
+ Address: `username@server.com`,
+ }},
+ },
+ {
+ input: `name (ignore comment)`,
+ addrs: []*mail.Address{{
+ Name: `name`,
+ Address: `username@server.com`,
+ }},
+ },
+ {
+ input: `"Mail Robot" <>`,
+ addrs: []*mail.Address{{
+ Name: `Mail Robot`,
+ }},
+ },
+ {
+ input: `Michal Hořejšek `,
+ addrs: []*mail.Address{{
+ Name: `Michal Hořejšek`,
+ Address: `hořejšek@mail.com`, // Not his real address.
+ }},
+ },
+ {
+ input: `First Last `,
+ addrs: []*mail.Address{{
+ Name: `First Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Last `,
+ addrs: []*mail.Address{{
+ Name: `First Last`,
+ Address: `user@domain.com.`,
+ }},
+ },
+ {
+ input: `First Last `,
+ addrs: []*mail.Address{{
+ Name: `First Last`,
+ Address: `user@domain.com.`,
+ }},
+ },
+ {
+ input: `First Last `,
+ addrs: []*mail.Address{{
+ Name: `First Last`,
+ Address: `user@domain.com:25`,
+ }},
+ },
+ {
+ input: `First Last `,
+ addrs: []*mail.Address{{
+ Name: `First Last`,
+ Address: `user@[10.0.0.1]`,
+ }},
+ },
+ {
+ input: ``,
+ addrs: []*mail.Address{{
+ Address: `postmaster@[10.10.10.10]`,
+ }},
+ },
+ {
+ input: `First Last < user@domain.com>`,
+ addrs: []*mail.Address{{
+ Name: `First Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `user@domain.com,`,
+ addrs: []*mail.Address{{
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle "Last" `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle Last `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle"Last" `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle "Last"`,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First "Middle" "Last" `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First "Middle""Last" `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `first.last `,
+ addrs: []*mail.Address{{
+ Name: `first.last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `first . last `,
+ addrs: []*mail.Address{{
+ Name: `first.last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ }
+ for _, test := range tests {
+ test := test
+
+ t.Run(test.input, func(t *testing.T) {
+ addrs, err := ParseAddress(test.input)
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, test.addrs, addrs)
+ })
+ }
+}
+
+func TestParseAddressList(t *testing.T) {
+ tests := []struct {
+ input string
+ addrs []*mail.Address
+ }{
+ {
+ input: `Alice , Bob , Eve `,
+ addrs: []*mail.Address{
+ {
+ Name: `Alice`,
+ Address: `alice@example.com`,
+ },
+ {
+ Name: `Bob`,
+ Address: `bob@example.com`,
+ },
+ {
+ Name: `Eve`,
+ Address: `eve@example.com`,
+ },
+ },
+ },
+ {
+ input: `Alice ; Bob ; Eve `,
+ addrs: []*mail.Address{
+ {
+ Name: `Alice`,
+ Address: `alice@example.com`,
+ },
+ {
+ Name: `Bob`,
+ Address: `bob@example.com`,
+ },
+ {
+ Name: `Eve`,
+ Address: `eve@example.com`,
+ },
+ },
+ },
+ {
+ input: `Ed Jones ,joe@where.test,John `,
+ addrs: []*mail.Address{
+ {
+ Name: `Ed Jones`,
+ Address: `c@a.test`,
+ },
+ {
+ Address: `joe@where.test`,
+ },
+ {
+ Name: `John`,
+ Address: `jdoe@one.test`,
+ },
+ },
+ },
+ {
+ input: `name (ignore comment) , (Comment as name) username2@server.com`,
+ addrs: []*mail.Address{
+ {
+ Name: `name`,
+ Address: `username@server.com`,
+ },
+ {
+ Address: `username2@server.com`,
+ },
+ },
+ },
+ {
+ input: `"normal name" , "comma, name" `,
+ addrs: []*mail.Address{
+ {
+ Name: `normal name`,
+ Address: `username@server.com`,
+ },
+ {
+ Name: `comma, name`,
+ Address: `address@server.com`,
+ },
+ },
+ },
+ {
+ input: `"comma, one" , "comma, two" `,
+ addrs: []*mail.Address{
+ {
+ Name: `comma, one`,
+ Address: `username@server.com`,
+ },
+ {
+ Name: `comma, two`,
+ Address: `address@server.com`,
+ },
+ },
+ },
+ {
+ input: `normal name , (comment)All.(around)address@(the)server.com`,
+ addrs: []*mail.Address{
+ {
+ Name: `normal name`,
+ Address: `username@server.com`,
+ },
+ {
+ Address: `All.address@server.com`,
+ },
+ },
+ },
+ {
+ input: `normal name , All.("comma, in comment")address@(the)server.com`,
+ addrs: []*mail.Address{
+ {
+ Name: `normal name`,
+ Address: `username@server.com`,
+ },
+ {
+ Address: `All.address@server.com`,
+ },
+ },
+ },
+ {
+ input: `Alice , Group:foo@bar;, bar@bar`,
+ addrs: []*mail.Address{
+ {
+ Name: `Alice`,
+ Address: `alice@example.com`,
+ },
+ {
+ Name: ``,
+ Address: `foo@bar`,
+ },
+ {
+ Name: ``,
+ Address: `bar@bar`,
+ },
+ },
+ },
+ {
+ input: `user@domain `,
+ addrs: []*mail.Address{{
+ Name: `user@domain`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `user @ domain `,
+ addrs: []*mail.Address{{
+ Name: `user@domain`,
+ Address: `user@domain.com`,
+ }},
+ },
+ }
+ for _, test := range tests {
+ test := test
+
+ t.Run(test.input, func(t *testing.T) {
+ addrs, err := ParseAddressList(test.input)
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, test.addrs, addrs)
+ })
+ }
+}
+
+func TestParseGroup(t *testing.T) {
+ tests := []struct {
+ input string
+ addrs []*mail.Address
+ }{
+ {
+ input: `A Group:Ed Jones ,joe@where.test,John ;`,
+ addrs: []*mail.Address{
+ {
+ Name: `Ed Jones`,
+ Address: `c@a.test`,
+ },
+ {
+ Address: `joe@where.test`,
+ },
+ {
+ Name: `John`,
+ Address: `jdoe@one.test`,
+ },
+ },
+ },
+ {
+ input: `undisclosed recipients:;`,
+ addrs: []*mail.Address{},
+ },
+ {
+ // We permit the group to not end in a semicolon, although as per RFC5322 it really should.
+ input: `undisclosed recipients:`,
+ addrs: []*mail.Address{},
+ },
+ {
+ // We permit the group to be surrounded with quotes, although as per RFC5322 it really shouldn't be.
+ input: `"undisclosed recipients:"`,
+ addrs: []*mail.Address{},
+ },
+ {
+ // We permit the group to be surrounded with quotes, although as per RFC5322 it really shouldn't be.
+ input: `"undisclosed recipients:;"`,
+ addrs: []*mail.Address{},
+ },
+ {
+ input: `undisclosed recipients:, foo@bar`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ },
+ },
+ {
+ input: `undisclosed recipients:;, foo@bar`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ },
+ },
+ {
+ input: `undisclosed recipients:bar@bar;, foo@bar`,
+ addrs: []*mail.Address{
+ {
+ Address: `bar@bar`,
+ },
+ {
+ Address: `foo@bar`,
+ },
+ },
+ },
+ {
+ input: `"undisclosed recipients:", foo@bar`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ },
+ },
+ {
+ input: `(Empty list)(start)Hidden recipients :(nobody(that I know)) ;`,
+ addrs: []*mail.Address{},
+ },
+ {
+ input: `foo@bar, g:bar@bar; z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `bar@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ {
+ input: `foo@bar, g:bar@bar;; z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `bar@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ {
+ input: `foo@bar, g:bar@bar;, z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `bar@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ {
+ input: `foo@bar, g:; z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ {
+ input: `foo@bar, g:;; z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ {
+ input: `foo@bar, g:;, z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ {
+ input: `foo@bar, "g:;", z@z`,
+ addrs: []*mail.Address{
+ {
+ Address: `foo@bar`,
+ },
+ {
+ Address: `z@z`,
+ },
+ },
+ },
+ }
+ for _, test := range tests {
+ test := test
+
+ t.Run(test.input, func(t *testing.T) {
+ addrs, err := ParseAddressList(test.input)
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, test.addrs, addrs)
+ })
+ }
+}
+
+func TestParseSingleAddressEncodedWord(t *testing.T) {
+ tests := []struct {
+ input string
+ addrs []*mail.Address
+ }{
+ {
+ input: `=?US-ASCII?Q?Keith_Moore?= `,
+ addrs: []*mail.Address{{
+ Name: `Keith Moore`,
+ Address: `moore@cs.utk.edu`,
+ }},
+ },
+ {
+ input: `=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= `,
+ addrs: []*mail.Address{{
+ Name: `Keld Jørn Simonsen`,
+ Address: `keld@dkuug.dk`,
+ }},
+ },
+ {
+ input: `=?ISO-8859-1?Q?Andr=E9?= Pirard `,
+ addrs: []*mail.Address{{
+ Name: `André Pirard`,
+ Address: `PIRARD@vm1.ulg.ac.be`,
+ }},
+ },
+ {
+ input: `=?ISO-8859-1?Q?Olle_J=E4rnefors?= `,
+ addrs: []*mail.Address{{
+ Name: `Olle Järnefors`,
+ Address: `ojarnef@admin.kth.se`,
+ }},
+ },
+ {
+ input: `=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= `,
+ addrs: []*mail.Address{{
+ Name: `Patrik Fältström`,
+ Address: `paf@nada.kth.se`,
+ }},
+ },
+ {
+ input: `Nathaniel Borenstein (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)`,
+ addrs: []*mail.Address{{
+ Name: `Nathaniel Borenstein`,
+ Address: `nsb@thumper.bellcore.com`,
+ }},
+ },
+ {
+ input: `=?UTF-8?B?PEJlemUgam3DqW5hPg==?= `,
+ addrs: []*mail.Address{{
+ Name: ``,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle =?utf-8?Q?Last?= `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle=?utf-8?Q?Last?= `,
+ addrs: []*mail.Address{{
+ Name: `First Middle=?utf-8?Q?Last?=`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First Middle =?utf-8?Q?Last?=`,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First =?utf-8?Q?Middle?= =?utf-8?Q?Last?= `,
+ addrs: []*mail.Address{{
+ Name: `First MiddleLast`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First =?utf-8?Q?Middle?==?utf-8?Q?Last?= `,
+ addrs: []*mail.Address{{
+ Name: `First MiddleLast`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First "Middle"=?utf-8?Q?Last?= `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First "Middle" =?utf-8?Q?Last?= `,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `First "Middle" =?utf-8?Q?Last?=`,
+ addrs: []*mail.Address{{
+ Name: `First Middle Last`,
+ Address: `user@domain.com`,
+ }},
+ },
+ {
+ input: `=?UTF-8?B?PEJlemUgam3DqW5hPg==?= `,
+ addrs: []*mail.Address{{
+ Name: ``,
+ Address: `user@domain.com`,
+ }},
+ },
+ }
+ for _, test := range tests {
+ test := test
+
+ t.Run(test.input, func(t *testing.T) {
+ addrs, err := ParseAddressList(test.input)
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, test.addrs, addrs)
+ })
+ }
+}
+
+func TestParseAddressInvalid(t *testing.T) {
+ inputs := []string{
+ `user@domain...com`,
+ `"comma, name" , another, name `,
+ `username`,
+ `=?ISO-8859-2?Q?First_Last?= , `,
+ `=?windows-1250?Q?Spr=E1vce_syst=E9mu?=`,
+ `"'user@domain.com.'"`,
+ ``,
+ `"Mail Delivery System <>" <@>`,
+ }
+
+ for _, test := range inputs {
+ test := test
+
+ t.Run(test, func(t *testing.T) {
+ _, err := ParseAddressList(test)
+ assert.Error(t, err)
+ assert.True(t, rfcparser.IsError(err))
+ })
+ }
+}
+
+func TestParseAddressListEmoji(t *testing.T) {
+ input := `=?utf-8?q?Goce_Test_=F0=9F=A4=A6=F0=9F=8F=BB=E2=99=82=F0=9F=99=88?= =?utf-8?q?=F0=9F=8C=B2=E2=98=98=F0=9F=8C=B4?= , "Proton GMX Edit" , "beta@bar.com" , "testios12" , "random@bar.com" , =?utf-8?q?=C3=9C=C3=A4=C3=B6_Jakdij?= , =?utf-8?q?Q=C3=A4_T=C3=B6=C3=BCst_12_Edit?= , =?utf-8?q?=E2=98=98=EF=B8=8F=F0=9F=8C=B2=F0=9F=8C=B4=F0=9F=99=82=E2=98=BA?= =?utf-8?q?=EF=B8=8F=F0=9F=98=83?= , "Somebody Outlook" `
+ expected := []*mail.Address{
+ {
+ Name: "Goce Test 🤦🏻♂🙈🌲☘🌴",
+ Address: "foo@bar.com",
+ },
+ {
+ Name: "Proton GMX Edit",
+ Address: "z@bar.com",
+ },
+ {
+ Name: "beta@bar.com",
+ Address: "beta@bar.com",
+ },
+ {
+ Name: "testios12",
+ Address: "random@bar.com",
+ },
+ {
+ Name: "random@bar.com",
+ Address: "random@bar.com",
+ },
+ {
+ Name: "Üäö Jakdij",
+ Address: "another@bar.com",
+ },
+ {
+ Name: "Qä Töüst 12 Edit",
+ Address: "random2@bar.com",
+ },
+ {
+ Name: "☘️🌲🌴🙂☺️😃",
+ Address: "dust@bar.com",
+ },
+ {
+ Name: "Somebody Outlook",
+ Address: "hotmal@bar.com",
+ },
+ }
+
+ addrs, err := ParseAddressList(input)
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, expected, addrs)
+}
+
+func TestParserAddressEmailValidation(t *testing.T) {
+ inputs := []string{
+ "test@io",
+ "test@iana.org",
+ "test@nominet.org.uk",
+ "test@about.museum",
+ "a@iana.org",
+ "test.test@iana.org",
+ "!#$%&`*+/=?^`{|}~@iana.org",
+ "123@iana.org",
+ "test@123.com",
+ "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org",
+ "test@mason-dixon.com",
+ "test@c--n.com",
+ "test@xn--hxajbheg2az3al.xn--jxalpdlp",
+ "xn--test@iana.org",
+ "1@pm.me",
+ }
+
+ for _, test := range inputs {
+ test := test
+
+ t.Run(test, func(t *testing.T) {
+ _, err := ParseAddressList(test)
+ assert.NoError(t, err)
+ })
+ }
+}
diff --git a/rfc5322/quoted.go b/rfc5322/quoted.go
new file mode 100644
index 00000000..fd9ef9df
--- /dev/null
+++ b/rfc5322/quoted.go
@@ -0,0 +1,95 @@
+package rfc5322
+
+// 3.2.4. Quoted Strings
+
+import "github.com/ProtonMail/gluon/rfcparser"
+
+func parseQuotedString(p *rfcparser.Parser) (parserString, error) {
+ var result rfcparser.Bytes
+ result.Offset = p.CurrentToken().Offset
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeDQuote, "expected \" for quoted string start"); err != nil {
+ return parserString{}, err
+ }
+
+ for {
+ if ok, err := tryParseFWS(p); err != nil {
+ return parserString{}, err
+ } else if ok {
+ result.Value = append(result.Value, ' ')
+ }
+
+ if !(p.CheckWith(isQText) || p.Check(rfcparser.TokenTypeBackslash)) {
+ break
+ }
+
+ if p.CheckWith(isQText) {
+ b, err := parseQContent(p)
+ if err != nil {
+ return parserString{}, err
+ }
+
+ result.Value = append(result.Value, b)
+ } else {
+ b, err := parseQuotedPair(p)
+ if err != nil {
+ return parserString{}, err
+ }
+
+ result.Value = append(result.Value, b)
+ }
+ }
+
+ if ok, err := tryParseFWS(p); err != nil {
+ return parserString{}, err
+ } else if ok {
+ result.Value = append(result.Value, ' ')
+ }
+
+ if err := p.Consume(rfcparser.TokenTypeDQuote, "expected \" for quoted string end"); err != nil {
+ return parserString{}, err
+ }
+
+ if _, err := tryParseCFWS(p); err != nil {
+ return parserString{}, err
+ }
+
+ return parserString{
+ String: result.IntoString(),
+ Type: parserStringTypeOther,
+ }, nil
+}
+
+func parseQContent(p *rfcparser.Parser) (byte, error) {
+ if ok, err := p.MatchesWith(isQText); err != nil {
+ return 0, err
+ } else if ok {
+ return p.PreviousToken().Value, nil
+ }
+
+ return parseQuotedPair(p)
+}
+
+func isQText(tokenType rfcparser.TokenType) bool {
+ // qtext = %d33 / ; Printable US-ASCII
+ // %d35-91 / ; characters not including
+ // %d93-126 / ; "\" or the quote character
+ // obs-qtext
+ //
+ // obs-qtext = obs-NO-WS-CTL
+ //
+ if (rfcparser.IsCTL(tokenType) && !isObsNoWSCTL(tokenType)) ||
+ tokenType == rfcparser.TokenTypeDQuote ||
+ tokenType == rfcparser.TokenTypeBackslash ||
+ tokenType == rfcparser.TokenTypeSP ||
+ tokenType == rfcparser.TokenTypeEOF ||
+ tokenType == rfcparser.TokenTypeError {
+ return false
+ }
+
+ return true
+}
diff --git a/rfc5322/quoted_test.go b/rfc5322/quoted_test.go
new file mode 100644
index 00000000..a66a1eaf
--- /dev/null
+++ b/rfc5322/quoted_test.go
@@ -0,0 +1,22 @@
+package rfc5322
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestQuotedString(t *testing.T) {
+ inputs := map[string]string{
+ `"f\".c"`: "f\".c",
+ "\" \r\n f\\\".c\r\n \"": " f\".c ",
+ ` " foo bar derer " `: " foo bar derer ",
+ }
+
+ for i, e := range inputs {
+ p := newTestRFCParser(i)
+ v, err := parseQuotedString(p)
+ require.NoError(t, err)
+ require.Equal(t, e, v.String.Value)
+ }
+}
diff --git a/rfcparser/parser.go b/rfcparser/parser.go
index 3623f954..ddde24cf 100644
--- a/rfcparser/parser.go
+++ b/rfcparser/parser.go
@@ -57,6 +57,11 @@ func IsError(err error) bool {
return errors.As(err, &perr)
}
+type ParserState struct {
+ prevToken Token
+ curToken Token
+}
+
func NewParser(s *Scanner) *Parser {
return &Parser{scanner: s}
}
@@ -476,6 +481,22 @@ func (p *Parser) MakeErrorAtOffset(err string, offset int) error {
}
}
+// SaveState saves the current and previous token state so it can potentially be restored later with RestoreState.
+func (p *Parser) SaveState() ParserState {
+ return ParserState{
+ prevToken: p.previousToken,
+ curToken: p.currentToken,
+ }
+}
+
+// RestoreState restores the previous and current tokens from the given state.
+// NOTE: If this is called without adjusting the scanner input to the location where these were recorded
+// you can break your parsing.
+func (p *Parser) RestoreState(state ParserState) {
+ p.previousToken = state.prevToken
+ p.currentToken = state.curToken
+}
+
func IsAStringChar(tokenType TokenType) bool {
/*
ASTRING-CHAR = ATOM-CHAR / resp-specials
@@ -519,7 +540,7 @@ func IsQuotedChar(tokenType TokenType) bool {
}
func IsCTL(tokenType TokenType) bool {
- return tokenType == TokenTypeCTL || tokenType == TokenTypeCR || tokenType == TokenTypeLF
+ return tokenType == TokenTypeCTL || tokenType == TokenTypeCR || tokenType == TokenTypeLF || tokenType == TokenTypeTab
}
func ByteToInt(b byte) int {
diff --git a/rfcparser/scanner.go b/rfcparser/scanner.go
index bffdfa43..7cb999b7 100644
--- a/rfcparser/scanner.go
+++ b/rfcparser/scanner.go
@@ -51,6 +51,9 @@ const (
TokenTypeCR
TokenTypeLF
TokenTypeCTL
+ TokenTypeTab
+ TokenTypeDelete
+ TokenTypeZero
)
type Token struct {
@@ -128,13 +131,18 @@ func (s *Scanner) ScanToken() (Token, error) {
}
if isByteCTL(b) {
- if b == '\r' {
+ switch b {
+ case 0x0:
+ return s.makeToken(TokenTypeZero), nil
+ case '\r':
return s.makeToken(TokenTypeCR), nil
- } else if b == '\n' {
+ case '\n':
return s.makeToken(TokenTypeLF), nil
+ case '\t':
+ return s.makeToken(TokenTypeTab), nil
+ default:
+ return s.makeToken(TokenTypeCTL), nil
}
-
- return s.makeToken(TokenTypeCTL), nil
}
switch b {
@@ -204,6 +212,8 @@ func (s *Scanner) ScanToken() (Token, error) {
return s.makeToken(TokenTypeRCurly), nil
case '~':
return s.makeToken(TokenTypeTilde), nil
+ case 0x7F:
+ return s.makeToken(TokenTypeDelete), nil
}
return Token{}, fmt.Errorf("unexpected character %v", b)