Skip to content

Commit

Permalink
Searching files as they are recursed to, rather than enumerating file…
Browse files Browse the repository at this point in the history
…s first. Should produce efficiency improvements. (per recommendations from novaha)
  • Loading branch information
ttufts committed Nov 2, 2015
1 parent 80c076c commit e7dddcf
Showing 1 changed file with 13 additions and 46 deletions.
59 changes: 13 additions & 46 deletions uberfind.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,6 @@
"""


# Function to return a list of file names in a given path recursively
def listAllFiles(path):

flist = []

# Walk trough all files and subdirectories
for (dirname, dirnames, filenames) in os.walk(path):
for filename in filenames:

# Append filename to a list
flist.append(os.path.join(dirname, filename))

return flist


# Function to filter a list of file names with certain extensions
def filterFileTypes(filelist, extensions):

filtered = []

for f in filelist:

# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(f)[-1].lower()

# Check the file extension and append on the list if matching.
if ext in extensions:
filtered.append(f)

return filtered


# Function to search for a list of keywords in an input file and write the results to an output file, using Regex
def searcherRegex(outfile, infile, lookup, n, v):

Expand Down Expand Up @@ -153,24 +121,23 @@ def main():
# Counter for the number of files containing a keyword
count = 0

# Get a list of all files in the path recursively
files = listAllFiles(args.path)
matched_files = 0

for root, dirs, files in os.walk(args.path):
for filename in files:
fname, ext = os.path.splitext(filename)
if ext not in args.extensions:
continue

# Enable file extensions filter or search all files
if args.all_files is False:
files_to_search = filterFileTypes(files, args.extensions)
else:
files_to_search = files
matched_files += 1

if len(files_to_search) == 0:
print "Unable to find any files matching the provided extensions: {}".format(", ".join(str(ext).replace('.', '') for ext in args.extensions))
return
f = os.path.join(root, filename)

# Perform search
for f in files_to_search:
count += searcherRegex(r, f, args.keywords, args.chars, args.verbose)
# Perform search
# for f in files_to_search:
count += searcherRegex(r, f, args.keywords, args.chars, args.verbose)

print "Searched through", len(files_to_search), 'files.'
print "Searched through {} files.".format(matched_files)
print "Found keyword in", count, 'files.'
print "For more details, check the results file:", os.path.realpath(args.results) + "\n"

Expand Down

0 comments on commit e7dddcf

Please sign in to comment.