From 255566c445dfae1bc26f6b25840bd7f355235d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Fri, 23 Apr 2021 10:35:43 +0200 Subject: [PATCH] New option in `filestream`: `include_files` && check after symlink is resolved (#25080) (#25244) ## What does this PR do? This PR adds support for a new feature in `filestream` input named `include_files`. This option is the counterpart of `exclude_files`. It expects a list of regexes and it only includes files that match the regexes. The PR also adds one more check after symlinks are resolved, the original file is tested again with the regexes in `include_files` and `exclude_files`. ## Why is it important? `exclude_files` was not enough for disallowing files. (cherry picked from commit dfb98b21806b7faef7df7ebd18f51833b1142521) --- .../config/filebeat.inputs.reference.yml.tmpl | 4 ++ filebeat/filebeat.reference.yml | 4 ++ filebeat/input/filestream/fswatch.go | 24 ++++++++++- filebeat/input/filestream/fswatch_test.go | 9 ++++ .../filestream/fswatch_test_non_windows.go | 42 +++++++++++++++---- x-pack/filebeat/filebeat.reference.yml | 4 ++ 6 files changed, 78 insertions(+), 9 deletions(-) diff --git a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl index bd24f2b0a6e..4986887155d 100644 --- a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl +++ b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl @@ -281,6 +281,10 @@ filebeat.inputs: # are matching any regular expression from the list. By default, no files are dropped. #prospector.scanner.exclude_files: ['.gz$'] + # Include files. A list of regular expressions to match. Filebeat keeps only the files that + # are matching any regular expression from the list. By default, no files are dropped. + #prospector.scanner.include_files: ['/var/log/.*'] + # Expand "**" patterns into regular glob patterns. #prospector.scanner.recursive_glob: true diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml index 9872e580e0f..ef01f8127e5 100644 --- a/filebeat/filebeat.reference.yml +++ b/filebeat/filebeat.reference.yml @@ -688,6 +688,10 @@ filebeat.inputs: # are matching any regular expression from the list. By default, no files are dropped. #prospector.scanner.exclude_files: ['.gz$'] + # Include files. A list of regular expressions to match. Filebeat keeps only the files that + # are matching any regular expression from the list. By default, no files are dropped. + #prospector.scanner.include_files: ['/var/log/.*'] + # Expand "**" patterns into regular glob patterns. #prospector.scanner.recursive_glob: true diff --git a/filebeat/input/filestream/fswatch.go b/filebeat/input/filestream/fswatch.go index 19614063db8..822763de022 100644 --- a/filebeat/input/filestream/fswatch.go +++ b/filebeat/input/filestream/fswatch.go @@ -51,6 +51,7 @@ type watcherFactory func(paths []string, cfg *common.Config) (loginp.FSWatcher, type fileScanner struct { paths []string excludedFiles []match.Matcher + includedFiles []match.Matcher symlinks bool log *logp.Logger @@ -234,6 +235,7 @@ func (w *fileWatcher) GetFiles() map[string]os.FileInfo { type fileScannerConfig struct { ExcludedFiles []match.Matcher `config:"exclude_files"` + IncludedFiles []match.Matcher `config:"include_files"` Symlinks bool `config:"symlinks"` RecursiveGlob bool `config:"recursive_glob"` } @@ -249,6 +251,7 @@ func newFileScanner(paths []string, cfg fileScannerConfig) (loginp.FSScanner, er fs := fileScanner{ paths: paths, excludedFiles: cfg.ExcludedFiles, + includedFiles: cfg.IncludedFiles, symlinks: cfg.Symlinks, log: logp.NewLogger(scannerName), } @@ -337,7 +340,7 @@ func (s *fileScanner) GetFiles() map[string]os.FileInfo { } func (s *fileScanner) shouldSkipFile(file string) bool { - if s.isFileExcluded(file) { + if s.isFileExcluded(file) || !s.isFileIncluded(file) { s.log.Debugf("Exclude file: %s", file) return true } @@ -359,6 +362,18 @@ func (s *fileScanner) shouldSkipFile(file string) bool { return true } + originalFile, err := filepath.EvalSymlinks(file) + if err != nil { + s.log.Debugf("finding path to original file has failed %s: %+v", file, err) + return true + } + // Check if original file is included to make sure we are not reading from + // unwanted files. + if s.isFileExcluded(originalFile) || !s.isFileIncluded(originalFile) { + s.log.Debugf("Exclude original file: %s", file) + return true + } + return false } @@ -384,6 +399,13 @@ func (s *fileScanner) isFileExcluded(file string) bool { return len(s.excludedFiles) > 0 && s.matchAny(s.excludedFiles, file) } +func (s *fileScanner) isFileIncluded(file string) bool { + if len(s.includedFiles) == 0 { + return true + } + return s.matchAny(s.includedFiles, file) +} + // matchAny checks if the text matches any of the regular expressions func (s *fileScanner) matchAny(matchers []match.Matcher, text string) bool { for _, m := range matchers { diff --git a/filebeat/input/filestream/fswatch_test.go b/filebeat/input/filestream/fswatch_test.go index 535865a3f47..c2c01a53da2 100644 --- a/filebeat/input/filestream/fswatch_test.go +++ b/filebeat/input/filestream/fswatch_test.go @@ -52,6 +52,7 @@ func TestFileScanner(t *testing.T) { testCases := map[string]struct { paths []string excludedFiles []match.Matcher + includedFiles []match.Matcher symlinks bool expectedFiles []string }{ @@ -66,6 +67,13 @@ func TestFileScanner(t *testing.T) { }, expectedFiles: []string{includedFilePath}, }, + "only include included_files": { + paths: []string{excludedFilePath, includedFilePath}, + includedFiles: []match.Matcher{ + match.MustCompile(includedFileName), + }, + expectedFiles: []string{includedFilePath}, + }, "skip directories": { paths: []string{filepath.Join(tmpDir, directoryPath)}, expectedFiles: []string{}, @@ -78,6 +86,7 @@ func TestFileScanner(t *testing.T) { t.Run(name, func(t *testing.T) { cfg := fileScannerConfig{ ExcludedFiles: test.excludedFiles, + IncludedFiles: test.includedFiles, Symlinks: test.symlinks, RecursiveGlob: false, } diff --git a/filebeat/input/filestream/fswatch_test_non_windows.go b/filebeat/input/filestream/fswatch_test_non_windows.go index a07db4cbf18..8c3cbd03c71 100644 --- a/filebeat/input/filestream/fswatch_test_non_windows.go +++ b/filebeat/input/filestream/fswatch_test_non_windows.go @@ -24,6 +24,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strconv" "testing" "github.com/stretchr/testify/assert" @@ -44,13 +45,14 @@ func TestFileScannerSymlinks(t *testing.T) { testCases := map[string]struct { paths []string excludedFiles []match.Matcher + includedFiles []match.Matcher symlinks bool expectedFiles []string }{ // covers test_input.py/test_skip_symlinks "skip symlinks": { paths: []string{ - filepath.Join(tmpDir, "symlink_to_included_file"), + filepath.Join(tmpDir, "symlink_to_0"), filepath.Join(tmpDir, "included_file"), }, symlinks: false, @@ -60,7 +62,7 @@ func TestFileScannerSymlinks(t *testing.T) { }, "return a file once if symlinks are enabled": { paths: []string{ - filepath.Join(tmpDir, "symlink_to_included_file"), + filepath.Join(tmpDir, "symlink_to_0"), filepath.Join(tmpDir, "included_file"), }, symlinks: true, @@ -68,14 +70,29 @@ func TestFileScannerSymlinks(t *testing.T) { mustAbsPath(filepath.Join(tmpDir, "included_file")), }, }, + "do not return symlink if original file is not allowed": { + paths: []string{ + filepath.Join(tmpDir, "symlink_to_1"), + filepath.Join(tmpDir, "included_file"), + }, + excludedFiles: []match.Matcher{ + match.MustCompile("original_" + excludedFileName), + }, + symlinks: true, + expectedFiles: []string{ + mustAbsPath(filepath.Join(tmpDir, "included_file")), + }, + }, } - err := os.Symlink( - mustAbsPath(filepath.Join(tmpDir, "included_file")), - mustAbsPath(filepath.Join(tmpDir, "symlink_to_included_file")), - ) - if err != nil { - t.Fatal(err) + for i, filename := range []string{"included_file", "excluded_file"} { + err := os.Symlink( + mustAbsPath(filepath.Join(tmpDir, "original_"+filename)), + mustAbsPath(filepath.Join(tmpDir, "symlink_to_"+strconv.Itoa(i))), + ) + if err != nil { + t.Fatal(err) + } } for name, test := range testCases { @@ -84,6 +101,7 @@ func TestFileScannerSymlinks(t *testing.T) { t.Run(name, func(t *testing.T) { cfg := fileScannerConfig{ ExcludedFiles: test.excludedFiles, + IncludedFiles: test.includedFiles, Symlinks: true, RecursiveGlob: false, } @@ -150,3 +168,11 @@ func TestFileWatcherRenamedFile(t *testing.T) { assert.Equal(t, testPath, evt.OldPath) assert.Equal(t, renamedPath, evt.NewPath) } + +func mustAbsPath(filename string) string { + abspath, err := filepath.Abs(filename) + if err != nil { + panic(err) + } + return abspath +} diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 2371dc84d7d..df9588dac50 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -2694,6 +2694,10 @@ filebeat.inputs: # are matching any regular expression from the list. By default, no files are dropped. #prospector.scanner.exclude_files: ['.gz$'] + # Include files. A list of regular expressions to match. Filebeat keeps only the files that + # are matching any regular expression from the list. By default, no files are dropped. + #prospector.scanner.include_files: ['/var/log/.*'] + # Expand "**" patterns into regular glob patterns. #prospector.scanner.recursive_glob: true