-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneetcode_scraper.py
60 lines (50 loc) · 1.98 KB
/
neetcode_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import config
import logging
from termcolor import colored
def scrape_neetcode():
driver = config.get_webdriver(headless=False)
url = "https://neetcode.io/practice"
driver.get(url)
# {problem_link: problem_name}
# since there are duplicate names with different casing
problem_dict = {}
print(colored("Scraping NeetCode...", "blue"))
try:
# Click NeetCode 150 tab
nc_150_xpath_locator = "//*[contains(text(), '🚀') and \
contains(text(), 'NeetCode 150')]"
nc_150_tab = driver.find_element(By.XPATH, nc_150_xpath_locator)
nc_150_tab.click()
# Wait for table to load
# nc_150_table_xpath_locator = (
# "//app-pattern-table-list[@class='ng-star-inserted']"
# )
# WebDriverWait(driver, 50).until(
# expected_conditions.presence_of_element_located(
# (By.XPATH, nc_150_table_xpath_locator)
# )
# )
for element in driver.find_elements(By.CSS_SELECTOR,
"a.table-text.text-color"):
problem_name = element.get_attribute("innerText").strip()
problem_link = element.get_attribute("href").rstrip('/')
problem_dict[problem_link] = problem_name
except NoSuchElementException:
logging.error(colored("Element not found.", "red"))
except TimeoutException:
logging.error(colored("Request timed out.", "red"))
finally:
driver.quit()
print(
colored(f"Successfully scraped {len(problem_dict)} NeetCode problems.",
"green")
)
return problem_dict
if __name__ == "__main__":
res = scrape_neetcode()
for link, name in res.items():
print(f"{name}: {link}")