Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New option in filestream: include_files && check after symlink is resolved #25080

Merged
merged 3 commits into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ filebeat.inputs:
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Include files. A list of regular expressions to match. Filebeat keeps only the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.include_files: ['/var/log/.*']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

Expand Down
4 changes: 4 additions & 0 deletions filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,10 @@ filebeat.inputs:
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Include files. A list of regular expressions to match. Filebeat keeps only the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.include_files: ['/var/log/.*']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

Expand Down
24 changes: 23 additions & 1 deletion filebeat/input/filestream/fswatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type watcherFactory func(paths []string, cfg *common.Config) (loginp.FSWatcher,
type fileScanner struct {
paths []string
excludedFiles []match.Matcher
includedFiles []match.Matcher
symlinks bool

log *logp.Logger
Expand Down Expand Up @@ -234,6 +235,7 @@ func (w *fileWatcher) GetFiles() map[string]os.FileInfo {

type fileScannerConfig struct {
ExcludedFiles []match.Matcher `config:"exclude_files"`
IncludedFiles []match.Matcher `config:"include_files"`
Symlinks bool `config:"symlinks"`
RecursiveGlob bool `config:"recursive_glob"`
}
Expand All @@ -249,6 +251,7 @@ func newFileScanner(paths []string, cfg fileScannerConfig) (loginp.FSScanner, er
fs := fileScanner{
paths: paths,
excludedFiles: cfg.ExcludedFiles,
includedFiles: cfg.IncludedFiles,
symlinks: cfg.Symlinks,
log: logp.NewLogger(scannerName),
}
Expand Down Expand Up @@ -337,7 +340,7 @@ func (s *fileScanner) GetFiles() map[string]os.FileInfo {
}

func (s *fileScanner) shouldSkipFile(file string) bool {
if s.isFileExcluded(file) {
if s.isFileExcluded(file) || !s.isFileIncluded(file) {
s.log.Debugf("Exclude file: %s", file)
return true
}
Expand All @@ -359,6 +362,18 @@ func (s *fileScanner) shouldSkipFile(file string) bool {
return true
}

originalFile, err := filepath.EvalSymlinks(file)
if err != nil {
s.log.Debugf("finding path to original file has failed %s: %+v", file, err)
return true
}
// Check if original file is included to make sure we are not reading from
// unwanted files.
if s.isFileExcluded(originalFile) || !s.isFileIncluded(originalFile) {
kvch marked this conversation as resolved.
Show resolved Hide resolved
s.log.Debugf("Exclude original file: %s", file)
return true
}

return false
}

Expand All @@ -384,6 +399,13 @@ func (s *fileScanner) isFileExcluded(file string) bool {
return len(s.excludedFiles) > 0 && s.matchAny(s.excludedFiles, file)
}

func (s *fileScanner) isFileIncluded(file string) bool {
if len(s.includedFiles) == 0 {
return true
}
return s.matchAny(s.includedFiles, file)
}

// matchAny checks if the text matches any of the regular expressions
func (s *fileScanner) matchAny(matchers []match.Matcher, text string) bool {
for _, m := range matchers {
Expand Down
9 changes: 9 additions & 0 deletions filebeat/input/filestream/fswatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func TestFileScanner(t *testing.T) {
testCases := map[string]struct {
paths []string
excludedFiles []match.Matcher
includedFiles []match.Matcher
symlinks bool
expectedFiles []string
}{
Expand All @@ -66,6 +67,13 @@ func TestFileScanner(t *testing.T) {
},
expectedFiles: []string{includedFilePath},
},
"only include included_files": {
paths: []string{excludedFilePath, includedFilePath},
includedFiles: []match.Matcher{
match.MustCompile(includedFileName),
},
expectedFiles: []string{includedFilePath},
},
"skip directories": {
paths: []string{filepath.Join(tmpDir, directoryPath)},
expectedFiles: []string{},
Expand All @@ -78,6 +86,7 @@ func TestFileScanner(t *testing.T) {
t.Run(name, func(t *testing.T) {
cfg := fileScannerConfig{
ExcludedFiles: test.excludedFiles,
IncludedFiles: test.includedFiles,
Symlinks: test.symlinks,
RecursiveGlob: false,
}
Expand Down
42 changes: 34 additions & 8 deletions filebeat/input/filestream/fswatch_test_non_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -44,13 +45,14 @@ func TestFileScannerSymlinks(t *testing.T) {
testCases := map[string]struct {
paths []string
excludedFiles []match.Matcher
includedFiles []match.Matcher
symlinks bool
expectedFiles []string
}{
// covers test_input.py/test_skip_symlinks
"skip symlinks": {
paths: []string{
filepath.Join(tmpDir, "symlink_to_included_file"),
filepath.Join(tmpDir, "symlink_to_0"),
filepath.Join(tmpDir, "included_file"),
},
symlinks: false,
Expand All @@ -60,22 +62,37 @@ func TestFileScannerSymlinks(t *testing.T) {
},
"return a file once if symlinks are enabled": {
paths: []string{
filepath.Join(tmpDir, "symlink_to_included_file"),
filepath.Join(tmpDir, "symlink_to_0"),
filepath.Join(tmpDir, "included_file"),
},
symlinks: true,
expectedFiles: []string{
mustAbsPath(filepath.Join(tmpDir, "included_file")),
},
},
"do not return symlink if original file is not allowed": {
paths: []string{
filepath.Join(tmpDir, "symlink_to_1"),
filepath.Join(tmpDir, "included_file"),
},
excludedFiles: []match.Matcher{
match.MustCompile("original_" + excludedFileName),
},
symlinks: true,
expectedFiles: []string{
mustAbsPath(filepath.Join(tmpDir, "included_file")),
},
},
}

err := os.Symlink(
mustAbsPath(filepath.Join(tmpDir, "included_file")),
mustAbsPath(filepath.Join(tmpDir, "symlink_to_included_file")),
)
if err != nil {
t.Fatal(err)
for i, filename := range []string{"included_file", "excluded_file"} {
err := os.Symlink(
mustAbsPath(filepath.Join(tmpDir, "original_"+filename)),
mustAbsPath(filepath.Join(tmpDir, "symlink_to_"+strconv.Itoa(i))),
)
if err != nil {
t.Fatal(err)
}
}

for name, test := range testCases {
Expand All @@ -84,6 +101,7 @@ func TestFileScannerSymlinks(t *testing.T) {
t.Run(name, func(t *testing.T) {
cfg := fileScannerConfig{
ExcludedFiles: test.excludedFiles,
IncludedFiles: test.includedFiles,
Symlinks: true,
RecursiveGlob: false,
}
Expand Down Expand Up @@ -150,3 +168,11 @@ func TestFileWatcherRenamedFile(t *testing.T) {
assert.Equal(t, testPath, evt.OldPath)
assert.Equal(t, renamedPath, evt.NewPath)
}

func mustAbsPath(filename string) string {
abspath, err := filepath.Abs(filename)
if err != nil {
panic(err)
}
return abspath
}
4 changes: 4 additions & 0 deletions x-pack/filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2692,6 +2692,10 @@ filebeat.inputs:
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Include files. A list of regular expressions to match. Filebeat keeps only the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.include_files: ['/var/log/.*']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

Expand Down