diff --git a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl index bd24f2b0a6e..4986887155d 100644 --- a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl +++ b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl @@ -281,6 +281,10 @@ filebeat.inputs: # are matching any regular expression from the list. By default, no files are dropped. #prospector.scanner.exclude_files: ['.gz$'] + # Include files. A list of regular expressions to match. Filebeat keeps only the files that + # are matching any regular expression from the list. By default, no files are dropped. + #prospector.scanner.include_files: ['/var/log/.*'] + # Expand "**" patterns into regular glob patterns. #prospector.scanner.recursive_glob: true diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml index ca22a00b786..287cbc073a8 100644 --- a/filebeat/filebeat.reference.yml +++ b/filebeat/filebeat.reference.yml @@ -688,6 +688,10 @@ filebeat.inputs: # are matching any regular expression from the list. By default, no files are dropped. #prospector.scanner.exclude_files: ['.gz$'] + # Include files. A list of regular expressions to match. Filebeat keeps only the files that + # are matching any regular expression from the list. By default, no files are dropped. + #prospector.scanner.include_files: ['/var/log/.*'] + # Expand "**" patterns into regular glob patterns. #prospector.scanner.recursive_glob: true diff --git a/filebeat/input/filestream/fswatch.go b/filebeat/input/filestream/fswatch.go index 19614063db8..822763de022 100644 --- a/filebeat/input/filestream/fswatch.go +++ b/filebeat/input/filestream/fswatch.go @@ -51,6 +51,7 @@ type watcherFactory func(paths []string, cfg *common.Config) (loginp.FSWatcher, type fileScanner struct { paths []string excludedFiles []match.Matcher + includedFiles []match.Matcher symlinks bool log *logp.Logger @@ -234,6 +235,7 @@ func (w *fileWatcher) GetFiles() map[string]os.FileInfo { type fileScannerConfig struct { ExcludedFiles []match.Matcher `config:"exclude_files"` + IncludedFiles []match.Matcher `config:"include_files"` Symlinks bool `config:"symlinks"` RecursiveGlob bool `config:"recursive_glob"` } @@ -249,6 +251,7 @@ func newFileScanner(paths []string, cfg fileScannerConfig) (loginp.FSScanner, er fs := fileScanner{ paths: paths, excludedFiles: cfg.ExcludedFiles, + includedFiles: cfg.IncludedFiles, symlinks: cfg.Symlinks, log: logp.NewLogger(scannerName), } @@ -337,7 +340,7 @@ func (s *fileScanner) GetFiles() map[string]os.FileInfo { } func (s *fileScanner) shouldSkipFile(file string) bool { - if s.isFileExcluded(file) { + if s.isFileExcluded(file) || !s.isFileIncluded(file) { s.log.Debugf("Exclude file: %s", file) return true } @@ -359,6 +362,18 @@ func (s *fileScanner) shouldSkipFile(file string) bool { return true } + originalFile, err := filepath.EvalSymlinks(file) + if err != nil { + s.log.Debugf("finding path to original file has failed %s: %+v", file, err) + return true + } + // Check if original file is included to make sure we are not reading from + // unwanted files. + if s.isFileExcluded(originalFile) || !s.isFileIncluded(originalFile) { + s.log.Debugf("Exclude original file: %s", file) + return true + } + return false } @@ -384,6 +399,13 @@ func (s *fileScanner) isFileExcluded(file string) bool { return len(s.excludedFiles) > 0 && s.matchAny(s.excludedFiles, file) } +func (s *fileScanner) isFileIncluded(file string) bool { + if len(s.includedFiles) == 0 { + return true + } + return s.matchAny(s.includedFiles, file) +} + // matchAny checks if the text matches any of the regular expressions func (s *fileScanner) matchAny(matchers []match.Matcher, text string) bool { for _, m := range matchers { diff --git a/filebeat/input/filestream/fswatch_test.go b/filebeat/input/filestream/fswatch_test.go index 535865a3f47..c2c01a53da2 100644 --- a/filebeat/input/filestream/fswatch_test.go +++ b/filebeat/input/filestream/fswatch_test.go @@ -52,6 +52,7 @@ func TestFileScanner(t *testing.T) { testCases := map[string]struct { paths []string excludedFiles []match.Matcher + includedFiles []match.Matcher symlinks bool expectedFiles []string }{ @@ -66,6 +67,13 @@ func TestFileScanner(t *testing.T) { }, expectedFiles: []string{includedFilePath}, }, + "only include included_files": { + paths: []string{excludedFilePath, includedFilePath}, + includedFiles: []match.Matcher{ + match.MustCompile(includedFileName), + }, + expectedFiles: []string{includedFilePath}, + }, "skip directories": { paths: []string{filepath.Join(tmpDir, directoryPath)}, expectedFiles: []string{}, @@ -78,6 +86,7 @@ func TestFileScanner(t *testing.T) { t.Run(name, func(t *testing.T) { cfg := fileScannerConfig{ ExcludedFiles: test.excludedFiles, + IncludedFiles: test.includedFiles, Symlinks: test.symlinks, RecursiveGlob: false, } diff --git a/filebeat/input/filestream/fswatch_test_non_windows.go b/filebeat/input/filestream/fswatch_test_non_windows.go index a07db4cbf18..8c3cbd03c71 100644 --- a/filebeat/input/filestream/fswatch_test_non_windows.go +++ b/filebeat/input/filestream/fswatch_test_non_windows.go @@ -24,6 +24,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strconv" "testing" "github.com/stretchr/testify/assert" @@ -44,13 +45,14 @@ func TestFileScannerSymlinks(t *testing.T) { testCases := map[string]struct { paths []string excludedFiles []match.Matcher + includedFiles []match.Matcher symlinks bool expectedFiles []string }{ // covers test_input.py/test_skip_symlinks "skip symlinks": { paths: []string{ - filepath.Join(tmpDir, "symlink_to_included_file"), + filepath.Join(tmpDir, "symlink_to_0"), filepath.Join(tmpDir, "included_file"), }, symlinks: false, @@ -60,7 +62,7 @@ func TestFileScannerSymlinks(t *testing.T) { }, "return a file once if symlinks are enabled": { paths: []string{ - filepath.Join(tmpDir, "symlink_to_included_file"), + filepath.Join(tmpDir, "symlink_to_0"), filepath.Join(tmpDir, "included_file"), }, symlinks: true, @@ -68,14 +70,29 @@ func TestFileScannerSymlinks(t *testing.T) { mustAbsPath(filepath.Join(tmpDir, "included_file")), }, }, + "do not return symlink if original file is not allowed": { + paths: []string{ + filepath.Join(tmpDir, "symlink_to_1"), + filepath.Join(tmpDir, "included_file"), + }, + excludedFiles: []match.Matcher{ + match.MustCompile("original_" + excludedFileName), + }, + symlinks: true, + expectedFiles: []string{ + mustAbsPath(filepath.Join(tmpDir, "included_file")), + }, + }, } - err := os.Symlink( - mustAbsPath(filepath.Join(tmpDir, "included_file")), - mustAbsPath(filepath.Join(tmpDir, "symlink_to_included_file")), - ) - if err != nil { - t.Fatal(err) + for i, filename := range []string{"included_file", "excluded_file"} { + err := os.Symlink( + mustAbsPath(filepath.Join(tmpDir, "original_"+filename)), + mustAbsPath(filepath.Join(tmpDir, "symlink_to_"+strconv.Itoa(i))), + ) + if err != nil { + t.Fatal(err) + } } for name, test := range testCases { @@ -84,6 +101,7 @@ func TestFileScannerSymlinks(t *testing.T) { t.Run(name, func(t *testing.T) { cfg := fileScannerConfig{ ExcludedFiles: test.excludedFiles, + IncludedFiles: test.includedFiles, Symlinks: true, RecursiveGlob: false, } @@ -150,3 +168,11 @@ func TestFileWatcherRenamedFile(t *testing.T) { assert.Equal(t, testPath, evt.OldPath) assert.Equal(t, renamedPath, evt.NewPath) } + +func mustAbsPath(filename string) string { + abspath, err := filepath.Abs(filename) + if err != nil { + panic(err) + } + return abspath +} diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 137c518a333..75a10511b47 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -2692,6 +2692,10 @@ filebeat.inputs: # are matching any regular expression from the list. By default, no files are dropped. #prospector.scanner.exclude_files: ['.gz$'] + # Include files. A list of regular expressions to match. Filebeat keeps only the files that + # are matching any regular expression from the list. By default, no files are dropped. + #prospector.scanner.include_files: ['/var/log/.*'] + # Expand "**" patterns into regular glob patterns. #prospector.scanner.recursive_glob: true