From 9363cf1fc4e79d0c61f7cc0d573b1dcb959a399f Mon Sep 17 00:00:00 2001 From: Anirudh Dilli Date: Sun, 21 Jul 2024 21:18:01 +0530 Subject: [PATCH 1/5] Refactor spider_all function for improved efficiency and readability Refactored the spider_all function to enhance performance and code clarity. - Replaced lists with sets for queries_made and new_queries to allow O(1) average time complexity for membership checks and automatic deduplication. - Used set operations to simplify the logic for collecting and processing unique, spiderable queries. - Ensured new queries are efficiently filtered and processed without reprocessing already made queries. - Improved overall readability and maintainability of the function. This change optimizes the function by leveraging set operations, making the code more concise and efficient. --- requirements.txt | 6 ++++++ tests/__init__.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ef85a9d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pandas +colorama +platformdirs +sherlock_project +phonenumbers +tldextract \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..0dd7071 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,17 @@ +import os +import sys +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + + +def load_tests(loader, tests, pattern): + """Test loader function for unittest discovery.""" + test_suite = unittest.TestSuite() + for all_test_suite in unittest.defaultTestLoader.discover(os.path.dirname(__file__), pattern='test_*.py'): + for test_suite in all_test_suite: + test_suite.addTests(test_suite) + return test_suite + +if __name__ == '__main__': + unittest.TextTestRunner(verbosity=2).run(load_tests()) \ No newline at end of file From 96ba5360718482ce12cf05b45fe0e56ce067e282 Mon Sep 17 00:00:00 2001 From: Anirudh Dilli Date: Sun, 21 Jul 2024 21:27:27 +0530 Subject: [PATCH 2/5] updated handler.py --- requirements.txt | 6 ------ src/oculus/handler.py | 31 ++++++++++++++++++------------- 2 files changed, 18 insertions(+), 19 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index ef85a9d..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -pandas -colorama -platformdirs -sherlock_project -phonenumbers -tldextract \ No newline at end of file diff --git a/src/oculus/handler.py b/src/oculus/handler.py index 632c820..5d2d742 100644 --- a/src/oculus/handler.py +++ b/src/oculus/handler.py @@ -68,20 +68,25 @@ def search_all(self, query:str, no_deduplicate:bool=False): if not no_deduplicate: self.collector.deduplicate() - def spider_all(self, query:str, depth:int=1, no_deduplicate:bool=False): - queries_made:List[str] = [f'{query}'] + def spider_all(self, query: str, depth: int = 1, no_deduplicate: bool = False): + queries_made: set = {query} self.search_all(query=query) - for iteration in range(depth): - new_queries:List[str] = [] - new_queries.extend(self.collector.get_unique_usernames(spiderable_only=True)) - new_queries.extend(self.collector.get_unique_emails(spiderable_only=True)) - new_queries.extend(self.collector.get_unique_phones(spiderable_only=True)) - new_queries.extend(self.collector.get_unique_fullnames(spiderable_only=True)) - new_queries = list(set(new_queries)) # deduplication (for some reason set .update was problematic) + + for i in range(depth): + new_queries: set = set() + + new_queries.update(self.collector.get_unique_usernames(spiderable_only=True)) + new_queries.update(self.collector.get_unique_emails(spiderable_only=True)) + new_queries.update(self.collector.get_unique_phones(spiderable_only=True)) + new_queries.update(self.collector.get_unique_fullnames(spiderable_only=True)) + + new_queries -= queries_made + queries_made.update(new_queries) + for new_query in new_queries: - if new_query in queries_made: - continue - if loglevel >= LogLevel.SUCCESS_ONLY.value: + if loglevel >= loglevel.SUCCESS_ONLY.value: print(f'{Fore.BLUE}{Style.BRIGHT}[{Fore.RESET}Spider{Fore.BLUE}{Style.BRIGHT}]{Fore.RESET}{Style.RESET_ALL} {new_query}') - queries_made.append(new_query) self.search_all(query=new_query) + + if not no_deduplicate: + self.collector.deduplicate() \ No newline at end of file From b84c13bc6f1811a038f76aff9e0b5127c2536d7c Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 21 Jul 2024 16:48:12 -0400 Subject: [PATCH 3/5] chore: remove unittest init --- tests/__init__.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 0dd7071..8b13789 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,17 +1 @@ -import os -import sys -import unittest -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - - -def load_tests(loader, tests, pattern): - """Test loader function for unittest discovery.""" - test_suite = unittest.TestSuite() - for all_test_suite in unittest.defaultTestLoader.discover(os.path.dirname(__file__), pattern='test_*.py'): - for test_suite in all_test_suite: - test_suite.addTests(test_suite) - return test_suite - -if __name__ == '__main__': - unittest.TextTestRunner(verbosity=2).run(load_tests()) \ No newline at end of file From 2432267eaf8dc5d4912455694df7d7c4128fd792 Mon Sep 17 00:00:00 2001 From: VainXploits <133690846+VainXploits@users.noreply.github.com> Date: Mon, 22 Jul 2024 02:42:43 +0530 Subject: [PATCH 4/5] Update src/oculus/handler.py loglevel updated to LogLevel... Co-authored-by: Paul Pfeister --- src/oculus/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oculus/handler.py b/src/oculus/handler.py index 5d2d742..716a01d 100644 --- a/src/oculus/handler.py +++ b/src/oculus/handler.py @@ -84,7 +84,7 @@ def spider_all(self, query: str, depth: int = 1, no_deduplicate: bool = False): queries_made.update(new_queries) for new_query in new_queries: - if loglevel >= loglevel.SUCCESS_ONLY.value: + if loglevel >= LogLevel.SUCCESS_ONLY.value: print(f'{Fore.BLUE}{Style.BRIGHT}[{Fore.RESET}Spider{Fore.BLUE}{Style.BRIGHT}]{Fore.RESET}{Style.RESET_ALL} {new_query}') self.search_all(query=new_query) From 477baca775630914ac2c5b57358f88afb3f4bb06 Mon Sep 17 00:00:00 2001 From: VainXploits <133690846+VainXploits@users.noreply.github.com> Date: Mon, 22 Jul 2024 02:42:56 +0530 Subject: [PATCH 5/5] Update src/oculus/handler.py Co-authored-by: Paul Pfeister --- src/oculus/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oculus/handler.py b/src/oculus/handler.py index 716a01d..e80208c 100644 --- a/src/oculus/handler.py +++ b/src/oculus/handler.py @@ -89,4 +89,4 @@ def spider_all(self, query: str, depth: int = 1, no_deduplicate: bool = False): self.search_all(query=new_query) if not no_deduplicate: - self.collector.deduplicate() \ No newline at end of file + self.collector.deduplicate()