Skip to content

Commit

Permalink
fix(python-packaging): skip MIME protocol error
Browse files Browse the repository at this point in the history
Signed-off-by: knqyf263 <knqyf263@gmail.com>
Co-authored-by: knqyf263 <knqyf263@gmail.com>
  • Loading branch information
DmitriyLewen and knqyf263 authored Oct 5, 2023
1 parent 48d70d5 commit 1237b47
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 3 deletions.
19 changes: 16 additions & 3 deletions pkg/python/packaging/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package packaging

import (
"bufio"
"errors"
"io"
"net/textproto"
"strings"

"golang.org/x/xerrors"

dio "github.com/aquasecurity/go-dep-parser/pkg/io"
"github.com/aquasecurity/go-dep-parser/pkg/log"
"github.com/aquasecurity/go-dep-parser/pkg/types"
)

Expand All @@ -23,10 +25,21 @@ func NewParser() types.Parser {
func (*Parser) Parse(r dio.ReadSeekerAt) ([]types.Library, []types.Dependency, error) {
rd := textproto.NewReader(bufio.NewReader(r))
h, err := rd.ReadMIMEHeader()
if err != nil && err != io.EOF {
if e := textproto.ProtocolError(""); errors.As(err, &e) {
// A MIME header may contain bytes in the key or value outside the set allowed by RFC 7230.
// cf. https://cs.opensource.google/go/go/+/a6642e67e16b9d769a0c08e486ba08408064df19
// However, our required key/value could have been correctly parsed,
// so we continue with the subsequent process.
log.Logger.Debugf("MIME protocol error: %s", err)
} else if err != nil && err != io.EOF {
return nil, nil, xerrors.Errorf("read MIME error: %w", err)
}

name, version := h.Get("name"), h.Get("version")
if name == "" || version == "" {
return nil, nil, xerrors.New("name or version is empty")
}

// "License-Expression" takes precedence as "License" is deprecated.
// cf. https://peps.python.org/pep-0639/#deprecate-license-field
var license string
Expand All @@ -49,8 +62,8 @@ func (*Parser) Parse(r dio.ReadSeekerAt) ([]types.Library, []types.Dependency, e

return []types.Library{
{
Name: h.Get("Name"),
Version: h.Get("Version"),
Name: name,
Version: version,
License: license,
},
}, nil, nil
Expand Down
11 changes: 11 additions & 0 deletions pkg/python/packaging/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ func TestParse(t *testing.T) {
// tr "\n" "\t" | awk -F "\t" '{printf("\{\""$1"\", \""$2"\", \""$3"\"\}\n")}'
want: []types.Library{{Name: "setuptools", Version: "51.3.3", License: "UNKNOWN"}},
},
{
name: "egg PKG-INFO with description containing non-RFC 7230 bytes",
input: "testdata/unidecode-egg-info.PKG-INFO",
want: []types.Library{
{
Name: "Unidecode",
Version: "0.4.1",
License: "UNKNOWN",
},
},
},
{
name: "egg-info",
input: "testdata/distlib-0.3.1-py3.9.egg-info",
Expand Down
47 changes: 47 additions & 0 deletions pkg/python/packaging/testdata/unidecode-egg-info.PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
Metadata-Version: 1.1
Name: Unidecode
Version: 0.4.1
Summary: US-ASCII transliterations of Unicode text
Home-page: http://www.tablix.org/~avian/blog/archives/2009/01/unicode_transliteration_in_python/
Author: Tomaz Solc
Author-email: tomaz.solc@tablix.org
License: UNKNOWN
Description:
Unidecode
=========

ASCII transliterations of Unicode text

Example Use
-----------

::

from unidecode import unidecode
print unidecode(u"北亰")

# That prints: Bei Jing

Description
-----------

It often happens that you have non-Roman text data in Unicode, but
you can't display it -- usually because you're trying to show it
to a user via an application that doesn't support Unicode, or
because the fonts you need aren't accessible. You could represent
the Unicode characters as "???????" or "BAA0q0...", but
that's nearly useless to the user who actually wants to read what
the text says.

What Unidecode provides is a function, 'unidecode(...)' that
takes Unicode data and tries to represent it in ASCII characters
(i.e., the universally displayable characters between 0x00 and 0x7F).
The representation is almost always an attempt at *transliteration*
-- i.e., conveying, in Roman letters, the pronunciation expressed by
the text in some other writing system. (See the example above)

This is a Python port of Text::Unidecode Perl module by
Sean M. Burke <sburke@cpan.org>.

Platform: UNKNOWN
Provides: unidecode
Expand Down

0 comments on commit 1237b47

Please sign in to comment.