Skip to content

Commit

Permalink
🐛 Limit Binary Artifact file reads to first 1024 bytes (#3923)
Browse files Browse the repository at this point in the history
* add OnMatchingFileReaderDo

Signed-off-by: Spencer Schrock <sschrock@google.com>

* switch binary artifact to using reader

Signed-off-by: Spencer Schrock <sschrock@google.com>

---------

Signed-off-by: Spencer Schrock <sschrock@google.com>
  • Loading branch information
spencerschrock committed Mar 6, 2024
1 parent 0a6b06a commit c779392
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 15 deletions.
46 changes: 39 additions & 7 deletions checks/fileparser/listing.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,21 @@ type PathMatcher struct {
CaseSensitive bool
}

// DoWhileTrueOnFileReader takes a filepath, its reader and
// optional variadic args. It returns a boolean indicating whether
// iterating over next files should continue.
type DoWhileTrueOnFileReader func(path string, reader io.Reader, args ...interface{}) (bool, error)

// OnMatchingFileReaderDo matches all files listed by `repoClient` against `matchPathTo`
// and on every successful match, runs onFileReader fn on the file's reader.
// Continues iterating along the matched files until onFileReader returns
// either a false value or an error.
func OnMatchingFileReaderDo(repoClient clients.RepoClient, matchPathTo PathMatcher,
onFileReader DoWhileTrueOnFileReader, args ...interface{},
) error {
return onMatchingFileDo(repoClient, matchPathTo, onFileReader, args...)
}

// DoWhileTrueOnFileContent takes a filepath, its content and
// optional variadic args. It returns a boolean indicating whether
// iterating over next files should continue.
Expand All @@ -75,6 +90,12 @@ type DoWhileTrueOnFileContent func(path string, content []byte, args ...interfac
// either a false value or an error.
func OnMatchingFileContentDo(repoClient clients.RepoClient, matchPathTo PathMatcher,
onFileContent DoWhileTrueOnFileContent, args ...interface{},
) error {
return onMatchingFileDo(repoClient, matchPathTo, onFileContent, args...)
}

func onMatchingFileDo(repoClient clients.RepoClient, matchPathTo PathMatcher,
onFile any, args ...interface{},
) error {
predicate := func(filepath string) (bool, error) {
// Filter out test files.
Expand All @@ -95,17 +116,28 @@ func OnMatchingFileContentDo(repoClient clients.RepoClient, matchPathTo PathMatc
}

for _, file := range matchedFiles {
rc, err := repoClient.GetFileReader(file)
reader, err := repoClient.GetFileReader(file)
if err != nil {
return fmt.Errorf("error during GetFileReader: %w", err)
}
content, err := io.ReadAll(rc)
rc.Close()
if err != nil {
return fmt.Errorf("reading from file: %w", err)
}

continueIter, err := onFileContent(file, content, args...)
var continueIter bool
switch f := onFile.(type) {
case DoWhileTrueOnFileReader:
continueIter, err = f(file, reader, args...)
reader.Close()
case DoWhileTrueOnFileContent:
var content []byte
content, err = io.ReadAll(reader)
reader.Close()
if err != nil {
return fmt.Errorf("reading from file: %w", err)
}
continueIter, err = f(file, content, args...)
default:
msg := fmt.Sprintf("invalid type (%T) passed to onMatchingFileDo", f)
return sce.WithMessage(sce.ErrScorecardInternal, msg)
}
if err != nil {
return err
}
Expand Down
25 changes: 17 additions & 8 deletions checks/raw/binary_artifact.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package raw
import (
"errors"
"fmt"
"io"
"path/filepath"
"regexp"
"strings"
Expand All @@ -39,6 +40,9 @@ var (
gradleWrapperValidationActionVersionConstraint = mustParseConstraint(`>= 1.0.0`)
)

// how many bytes are considered when determining if a file is text or binary.
const binaryTestLen = 1024

// mustParseConstraint attempts parse of semver constraint, panics if fail.
func mustParseConstraint(c string) *semver.Constraints {
if c, err := semver.NewConstraint(c); err != nil {
Expand All @@ -52,10 +56,10 @@ func mustParseConstraint(c string) *semver.Constraints {
func BinaryArtifacts(req *checker.CheckRequest) (checker.BinaryArtifactData, error) {
c := req.RepoClient
files := []checker.File{}
err := fileparser.OnMatchingFileContentDo(c, fileparser.PathMatcher{
err := fileparser.OnMatchingFileReaderDo(c, fileparser.PathMatcher{
Pattern: "*",
CaseSensitive: false,
}, checkBinaryFileContent, &files)
}, checkBinaryFileReader, &files)
if err != nil {
return checker.BinaryArtifactData{}, fmt.Errorf("%w", err)
}
Expand Down Expand Up @@ -96,17 +100,17 @@ func excludeValidatedGradleWrappers(c clients.RepoClient, files []checker.File)
return files, nil
}

var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path string, content []byte,
var checkBinaryFileReader fileparser.DoWhileTrueOnFileReader = func(path string, reader io.Reader,
args ...interface{},
) (bool, error) {
if len(args) != 1 {
return false, fmt.Errorf(
"checkBinaryFileContent requires exactly one argument: %w", errInvalidArgLength)
"checkBinaryFileReader requires exactly one argument: %w", errInvalidArgLength)
}
pfiles, ok := args[0].(*[]checker.File)
if !ok {
return false, fmt.Errorf(
"checkBinaryFileContent requires argument of type *[]checker.File: %w", errInvalidArgType)
"checkBinaryFileReader requires argument of type *[]checker.File: %w", errInvalidArgType)
}

binaryFileTypes := map[string]bool{
Expand Down Expand Up @@ -138,8 +142,13 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin
"wasm": true,
"whl": true,
}

content, err := io.ReadAll(io.LimitReader(reader, binaryTestLen))
if err != nil {
return false, fmt.Errorf("reading file: %w", err)
}

var t types.Type
var err error
if len(content) == 0 {
return true, nil
}
Expand Down Expand Up @@ -169,12 +178,12 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin
return true, nil
}

// determines if the first 1024 bytes are text
// determines if the first binaryTestLen bytes are text
//
// A version of golang.org/x/tools/godoc/util modified to allow carriage returns
// and utf8.RuneError (0xFFFD), as the file may not be utf8 encoded.
func isText(s []byte) bool {
const max = 1024 // at least utf8.UTFMax
const max = binaryTestLen // at least utf8.UTFMax (4)
if len(s) > max {
s = s[0:max]
}
Expand Down

0 comments on commit c779392

Please sign in to comment.