From 74b2e597e3cbc9d9e22752d8db3c3f367683ea7a Mon Sep 17 00:00:00 2001 From: Gaurav Singh <12825441+Grv-Singh@users.noreply.github.com> Date: Sun, 16 Jun 2024 19:54:03 +0530 Subject: [PATCH] Update index.html --- index.html | 496 ++++++++++++++++------------------------------------- 1 file changed, 149 insertions(+), 347 deletions(-) diff --git a/index.html b/index.html index df9cd24..38e3c99 100644 --- a/index.html +++ b/index.html @@ -1,360 +1,162 @@ - - - - - - - - - - - - - - - - - - - - - - - - Portfolio - Gaurav Singh - - - - - - - - - - - - - - - - - - -
-
- -

- -

Gaurav Singh

-

- Defiant, innovative and unrelenting by nature. Talks about advancements, sustainability and purpose driven business. A data engineer in Cleaning, Analytics, Storytelling, Visualization and a web specialist. -

-
-

🤝🏻 Let's connect

-

- - - - - - - - -

-
-
- -
-
-

😍 Like what you see ?

- - -
- -
-
-
-

-
-
-

👋 Hi! there,

- - + # Switch to the new window + WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2)) + windows = driver.window_handles + driver.switch_to.window(windows[1]) -

🛠 Tools of trade

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

-
-

🍃 Biodiversity scavenged

-
- - + # Scrape data from the popup + soup = BeautifulSoup(driver.page_source, 'html.parser') - -
- -

📐 Math & compute

-

+ # Replace
tags with newline characters + for br in soup.find_all("br"): + br.replace_with("\n") -

⌨ Clickety Clack

-

+ text_elements = soup.find_all('p') -

📈 Work activity

-

- -

⌚ Last month

-
-

⏳ Last year

-
+ # Directory and file setup + parent_folder = 'Bare DQE Bot' + job_folder = os.path.join(parent_folder, job_id) + if not os.path.exists(job_folder): + os.makedirs(job_folder) - -

-

🧑‍🎨 Art Gallery

- -

-

-

+ unwanted_text = "Note – For data quality reasons, please upload your photos directly from your computer, mobile phone or tablet. Do not upload them (or send from your mobile to your computer) via any other website, social network, or application of the sort of WhatsApp, Viber, Messenger or similar. Please do not modify your photos or their properties in any way. If you do so, your report will be excluded." -

🍽️ Cooking

-

- -

-

+ with open(os.path.join(job_folder, 'Notes.txt'), 'w', encoding='utf-8') as file: + for element in text_elements: + text = element.get_text() # Get text with replaced
tags + if text.strip() != unwanted_text: + file.write(text.strip() + '\n\n') # Adding two new lines for clear separation -

🎧 Spotify Playing

-

+ # Close the popup and switch back to the main window + driver.close() + driver.switch_to.window(windows[0]) -

- 📷 Photography -

-

-

-

+# Continue with any other scraping on the main page +# Example: Extract and print all paragraph texts +paragraphs = soup.find_all('p') +for i, p in enumerate(paragraphs): + print(f"Paragraph {i}: {p.text}") + +# Close the browser +driver.quit() + +def extract_report_data(driver): + report_data = {} + table = driver.find_element(By.CLASS_NAME, "admin-question-form") + rows = table.find_elements(By.CSS_SELECTOR, "tr") + for row in rows: + if "Bare - Code of Conduct" in row.text: + break # Stop processing if "Bare - Code of Conduct" is found + cells = row.find_elements(By.CSS_SELECTOR, "td") + if not cells: + continue + question_cell = cells[0] + question = question_cell.text.strip() # Initialize question with the full text of the cell + inputs = row.find_elements(By.CSS_SELECTOR, "input[type='text'], input[type='radio'], input[type='checkbox'], textarea, select") + answers = [] + for input_elem in inputs: + if input_elem.tag_name == 'textarea' or input_elem.get_attribute('type') == 'text': + # Check if the parent TR does not have the specified background color and the input is empty + if input_elem.get_attribute('value').strip() == '' and 'background-color: rgb(128, 128, 128);' not in input_elem.find_element(By.XPATH, './ancestor::tr').get_attribute('style'): + answers.append('Not Answered') + else: + answers.append(input_elem.get_attribute('value').strip()) + elif input_elem.tag_name == 'select': + answers.append(input_elem.find_element(By.CSS_SELECTOR, 'option:checked').text.strip()) + elif input_elem.get_attribute('type') == 'radio' or input_elem.get_attribute('type') == 'checkbox': + if input_elem.is_selected(): + # Using JavaScript to retrieve the next sibling node's text content + label = driver.execute_script( + "return arguments[0].nextSibling.textContent;", input_elem) + answers.append(label.strip()) + + # Join answers with 'and' if more than one checkbox is selected + answer = ' and '.join(filter(None, answers)) + if answer: # Only add to dictionary if answer is not empty + report_data[question] = answer + return report_data + +def data_processing(html_content, job_folder): + soup = BeautifulSoup(html_content, 'html.parser') + data_dict = {} + + # Find the specific tag with bgcolor attribute + target_td = soup.find('td', {'bgcolor': "#f5f5f5"}) + + # Replace
tags with newline characters in the target content + for br in target_td.find_all("br"): + br.replace_with("\n") + + # Get text from the modified soup object + text_content = target_td.get_text(separator="\n") + + # Split the text into lines and process each line + lines = text_content.split('\n') + unwanted_keys = ["00", "Kindly note the following scenario to be followed"] + for line in lines: + if ':' in line: + key, value = line.split(':', 1) # Split only on the first colon + key = key.strip() + if key not in unwanted_keys: + data_dict[key] = value.strip() + + # Export data to CSV in the job folder + csv_file_path = os.path.join(job_folder, 'data_processed.csv') + with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['Key', 'Value']) + for key, value in data_dict.items(): + writer.writerow([key, value]) + + return data_dict + +def download_images(driver, job_folder): + # Get the page source and parse it with BeautifulSoup + soup = BeautifulSoup(driver.page_source, 'html.parser') + + # Find all tags where href starts with the specified URL + image_links = soup.find_all('a', href=lambda x: x and x.startswith('https://cdn.sassiex.com')) + + exif_date_time_found = False # Flag to track if EXIF date and time is found + + for link in image_links: + # Get the image URL + image_url = link['href'] + + # Input the username and password using updated methods + driver.find_element(By.NAME, 'login').send_keys("GSingh") # Use 'login' for the username field + driver.find_element(By.NAME, 'password').send_keys("Henghyan00!") # Use 'password' for the password field + driver.find_element(By.ID, 'login-button').click() # Use 'login-button' ID for the submit button + + # Navigate to the target page + job_id = "11689560" + url = f"https://www.apollo.bareinternational.com/admin/ShopReview-Right.php?JobID={job_id}" + driver.get(url) + + # Handle sticky notes + handle_sticky_notes(driver, job_id) + + # Close the browser + driver.quit() + +if __name__ == "__main__": + main() -
-

🌐💻 Volunteer Computing

-

- -

- -

-
-
-
-

-
- -
- - - - - -