-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
1,413 additions
and
1,427 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
,Số hiệu chuyến bay,Khởi hành,Đến,Airbus,SkyBOSS_Business,SkyBOSS,Deluxe,Eco,Thuế_phí | ||
0,VJ640,2024-03-07 22:35:00,2024-03-07 23:50:00,Airbus A321,,3992000.0000000005,2738000.0,1968000.0,593.0 | ||
1,VJ622,2024-03-08 07:25:00,2024-03-08 08:45:00,Airbus A320,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
2,VJ626,2024-03-08 11:05:00,2024-03-08 12:25:00,Airbus A320,,3992000.0000000005,3024000.0,2254000.0,593.0 | ||
3,VJ630,2024-03-08 14:40:00,2024-03-08 16:00:00,Airbus A321,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
4,VJ632,2024-03-08 17:20:00,2024-03-08 18:35:00,Airbus A320,,3992000.0000000005,3145000.0,2375000.0,593.0 | ||
5,VJ638,2023-03-08 20:55:00,2023-03-08 22:15:00,Airbus A320,,,,,593.0 | ||
6,VJ640,2024-03-08 22:35:00,2024-03-08 23:50:00,Airbus A321,,3992000.0000000005,2595000.0,1825000.0,593.0 | ||
7,VJ622,2024-03-09 07:25:00,2024-03-09 08:45:00,Airbus A320,,3992000.0000000005,3112000.0,2342000.0,593.0 | ||
8,VJ626,2023-03-09 11:05:00,2023-03-09 12:25:00,Airbus A320,,,,,593.0 | ||
9,VJ630,2024-03-09 14:40:00,2024-03-09 16:00:00,Airbus A321,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
10,VJ632,2024-03-09 17:20:00,2024-03-09 18:35:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
11,VJ634,2024-03-09 19:25:00,2024-03-09 20:45:00,Airbus A321,,3992000.0000000005,2177000.0,1407000.0,593.0 | ||
12,VJ638,2024-03-09 20:15:00,2024-03-09 21:30:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
13,VJ640,2024-03-09 22:35:00,2024-03-09 23:55:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
14,VJ622,2024-03-10 07:25:00,2024-03-10 08:45:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
15,VJ626,2024-03-10 11:05:00,2024-03-10 12:25:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
16,VJ630,2024-03-10 14:40:00,2024-03-10 16:00:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
17,VJ632,2024-03-10 17:10:00,2024-03-10 18:30:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
18,VJ634,2024-03-10 19:25:00,2024-03-10 20:45:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
19,VJ638,2024-03-10 20:10:00,2024-03-10 21:30:00,Airbus A320,,3992000.0000000005,2177000.0,1407000.0,593.0 | ||
20,VJ640,2024-03-10 22:35:00,2024-03-10 23:55:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
21,VJ622,2024-03-11 07:25:00,2024-03-11 08:45:00,Airbus A320,,3992000.0000000005,2595000.0,1825000.0,593.0 | ||
22,VJ626,2024-03-11 11:05:00,2024-03-11 12:25:00,Airbus A320,,3992000.0000000005,2595000.0,1825000.0,593.0 | ||
23,VJ630,2024-03-11 14:40:00,2024-03-11 16:00:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
24,VJ632,2024-03-11 17:20:00,2024-03-11 18:35:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
25,VJ634,2024-03-11 19:25:00,2024-03-11 20:45:00,Airbus A320,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
26,VJ636,2024-03-11 20:15:00,2024-03-11 21:35:00,Airbus A320,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
27,VJ622,2024-03-12 07:25:00,2024-03-12 08:45:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
28,VJ626,2024-03-12 11:05:00,2024-03-12 12:25:00,Airbus A320,,3992000.0000000005,2595000.0,1825000.0,593.0 | ||
29,VJ630,2024-03-12 14:40:00,2024-03-12 16:00:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
30,VJ632,2024-03-12 17:10:00,2024-03-12 18:30:00,Airbus A320,,3992000.0000000005,2177000.0,1407000.0,593.0 | ||
31,VJ634,2024-03-12 19:25:00,2024-03-12 20:45:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
32,VJ636,2024-03-12 20:15:00,2024-03-12 21:35:00,Airbus A321,,3992000.0000000005,1902000.0,1132000.0,593.0 | ||
33,VJ622,2024-03-13 07:25:00,2024-03-13 08:45:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
34,VJ630,2024-03-13 14:40:00,2024-03-13 16:00:00,Airbus A321,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
35,VJ632,2024-03-13 16:55:00,2024-03-13 18:15:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
36,VJ634,2024-03-13 19:25:00,2024-03-13 20:45:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
37,VJ636,2024-03-13 20:15:00,2024-03-13 21:35:00,Airbus A320,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
38,VJ622,2024-03-14 07:25:00,2024-03-14 08:45:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
39,VJ626,2024-03-14 11:05:00,2024-03-14 12:25:00,Airbus A320,,3992000.0000000005,2738000.0,1968000.0,593.0 | ||
40,VJ630,2024-03-14 14:40:00,2024-03-14 16:00:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
41,VJ632,2024-03-14 17:20:00,2024-03-14 18:35:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
42,VJ634,2024-03-14 19:25:00,2024-03-14 20:45:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
43,VJ636,2024-03-14 20:15:00,2024-03-14 21:35:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
44,VJ622,2024-03-15 07:25:00,2024-03-15 08:45:00,Airbus A320,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
45,VJ626,2024-03-15 11:05:00,2024-03-15 12:25:00,Airbus A320,,3992000.0000000005,2947000.0,2177000.0,593.0 | ||
46,VJ630,2024-03-15 14:40:00,2024-03-15 16:00:00,Airbus A321,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
47,VJ632,2024-03-15 16:55:00,2024-03-15 18:15:00,Airbus A320,,3992000.0000000005,3024000.0,2254000.0,593.0 | ||
48,VJ634,2024-03-15 19:25:00,2024-03-15 20:45:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
49,VJ636,2024-03-15 21:10:00,2024-03-15 22:30:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
50,VJ622,2024-03-16 07:25:00,2024-03-16 08:45:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
51,VJ626,2024-03-16 11:05:00,2024-03-16 12:25:00,Airbus A320,,3992000.0000000005,2595000.0,1825000.0,593.0 | ||
52,VJ630,2024-03-16 14:40:00,2024-03-16 16:00:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
53,VJ632,2024-03-16 17:20:00,2024-03-16 18:35:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
54,VJ634,2024-03-16 19:25:00,2024-03-16 20:45:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
55,VJ622,2024-03-17 07:25:00,2024-03-17 08:45:00,Airbus A320,,3992000.0000000005,2177000.0,1407000.0,593.0 | ||
56,VJ626,2024-03-17 11:05:00,2024-03-17 12:25:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
57,VJ630,2024-03-17 14:40:00,2024-03-17 16:00:00,Airbus A321,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
58,VJ632,2024-03-17 17:10:00,2024-03-17 18:30:00,Airbus A320,,3992000.0000000005,2342000.0,1572000.0,593.0 | ||
59,VJ634,2024-03-17 19:25:00,2024-03-17 20:45:00,Airbus A321,,3992000.0000000005,2034000.0,1264000.0,593.0 | ||
60,VJ636,2024-03-17 20:15:00,2024-03-17 21:35:00,Airbus A321,,3992000.0000000005,1902000.0,1132000.0,593.0 | ||
61,VJ622,2024-03-18 07:25:00,2024-03-18 08:45:00,Airbus A320,,3992000.0000000005,3255000.0,2485000.0,593.0 | ||
62,VJ626,2024-03-18 11:05:00,2024-03-18 12:25:00,Airbus A320,,3992000.0000000005,2892000.0,2122000.0,593.0 | ||
63,VJ630,2024-03-18 14:40:00,2024-03-18 16:00:00,Airbus A321,,3992000.0000000005,2595000.0,1825000.0,593.0 | ||
64,VJ632,2024-03-18 17:20:00,2024-03-18 18:35:00,Airbus A320,,3992000.0000000005,2463000.0,1693000.0,593.0 | ||
65,VJ634,2024-03-18 19:25:00,2024-03-18 20:45:00,Airbus A321,,3992000.0000000005,2177000.0,1407000.0,593.0 | ||
66,VJ636,2024-03-18 20:15:00,2024-03-18 21:35:00,Airbus A320,,3992000.0000000005,2177000.0,1407000.0,593.0 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
- Cập nhật ngày 20, 21/4/2023 update trường hợp các chuyến bay hết chỗ và lấy được toàn bộ muibox. | ||
- Cập nhật ngày 24/6/2023 update tắt quảng cáo mới (2 quảng cáo), accept cookie, thuế phí bị tính riêng. | ||
- Cập nhật ngày 25/6/2023 fix lỗi không lấy được ngày hết chỗ. | ||
- Cập nhật ngày 07/03/2024 fix lỗi tắt popup quảng cáo. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
from selenium import webdriver | ||
from selenium.webdriver.common.keys import Keys | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from selenium.webdriver.common.action_chains import ActionChains | ||
from bs4 import BeautifulSoup | ||
import re # Regular expressionc | ||
from datetime import date, timedelta | ||
from time import sleep | ||
from selenium.webdriver.chrome.service import Service | ||
from webdriver_manager.chrome import ChromeDriverManager | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
from selenium.common.exceptions import StaleElementReferenceException | ||
from selenium.common.exceptions import ElementNotInteractableException | ||
|
||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
# Mở Chrome và cho full màn hình | ||
# Tự tải phiên bản ChromeDriver phù hợp với phiên bản trên máy | ||
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install())) #webdriver.Chrome("chromedriver.exe") | ||
browser.maximize_window() | ||
# Set thời gian chờ mặc định là 10 giây | ||
browser.implicitly_wait(10) | ||
# Mở trang chủ của Vietjet | ||
url = 'https://www.vietjetair.com' | ||
browser.get(url) | ||
|
||
wait = WebDriverWait(browser, 10) | ||
|
||
# Tắt quảng cáo | ||
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="onesignal-slidedown-cancel-button"]'))).click() | ||
|
||
close_buttons = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'button.MuiIconButton-root[aria-label="close"]'))) | ||
for close_button in close_buttons[1:]: | ||
close_button.click() | ||
|
||
# Chọn sử dụng cookie | ||
wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@role="dialog"]//button'))).click() | ||
|
||
# Chọn vé một chiều | ||
wait.until(EC.element_to_be_clickable(( | ||
By.XPATH, '//span[contains(text(),"Một chiều")]'))).click() | ||
|
||
# Chọn điểm khởi hành là TP HCM | ||
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@class="MuiInputBase-input MuiOutlinedInput-input"]'))).click() | ||
# selectOrigin = browser.find_element(By.XPATH, '//*[@class="MuiInputBase-input MuiOutlinedInput-input"]') | ||
# selectOrigin.click() | ||
# sleep(3) | ||
|
||
# Chọn sân bay Tân Sơn Nhất | ||
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[contains(text(),"Sân bay Tân Sơn Nhất")]'))).click() | ||
|
||
# Chọn sân bay Đà Nẵng | ||
try: | ||
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[contains(text(),"Sân bay Đà Nẵng")]'))).click() | ||
except: | ||
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[contains(text(),"Sân bay Đà Nẵng")]'))).click() | ||
|
||
# Chọn ngày tháng | ||
today = date.today() | ||
tomorrow = today + timedelta(1) | ||
tomorrow_year = str(tomorrow.year) | ||
tomorrow_month = tomorrow.strftime('%m') | ||
tomorrow_day = str(tomorrow.day) | ||
|
||
|
||
# Chuỗi ngày tháng để click | ||
str_time = 'tháng '+ tomorrow_month + ' ' + tomorrow_year | ||
|
||
wait.until(EC.presence_of_element_located(( | ||
By.XPATH, '//*[contains(text(),"{a}")]//following-sibling::div[2]//descendant::span[contains(text(),"{b}")]'.format(a = str_time, b = tomorrow_day)))).click() | ||
|
||
# Click Tìm chuyến bay | ||
submit = wait.until(EC.visibility_of_element_located((By.XPATH, '(//*[contains(text(), "Tìm chuyến bay")])[2]'))) | ||
try: | ||
submit.click() | ||
except: | ||
submit.click() | ||
|
||
from lxml import html | ||
# Chọn số ngày cần lấy dữ liệu | ||
nums_date = 12 | ||
tree = BeautifulSoup(browser.page_source, 'html.parser') | ||
|
||
def find_by_xpath(element_source,xpath_expression): | ||
try: | ||
root = html.fromstring(element_source) | ||
return root.xpath(xpath_expression) | ||
except: | ||
return [] | ||
|
||
def check_seat(browser, rows, type, i): # kiểm tra xem còn chỗ hay không: type = hạng vé (tương ứng là 1, 2, 3, 4 ) | ||
check = browser.find_element(By.XPATH, rows + f'[{i+1}]/div/div/div[2]/div[{type}]//p[contains(text(), "Hết chỗ")]') | ||
try: | ||
if check: # Nếu hết chỗ thì return về xpath tương ứng | ||
return rows + f'[{i+1}]/div/div/div[2]/div[{type}]/div//p/text()' | ||
except: return rows + f'[{i+1}]/div/div/div[2]/div[{type}]/div/p[1]/text()' | ||
|
||
def click_with_js(element): | ||
"""Clicks an element using JavaScript""" | ||
browser.execute_script("arguments[0].click();", element) | ||
|
||
|
||
class display_to_be_flex(object): | ||
"""An expectation for checking that an element's display property has been changed to 'flex'.""" | ||
def __init__(self, locator): | ||
self.locator = locator | ||
|
||
def __call__(self, driver): | ||
try: | ||
element = EC.presence_of_element_located(self.locator)(driver) | ||
if driver.execute_script("return getComputedStyle(arguments[0]).display;", element) == "flex": | ||
return element | ||
else: | ||
return False | ||
except StaleElementReferenceException: | ||
return False | ||
|
||
|
||
data = [] | ||
root = '//*[@id="root"]/div[1]/div[2]/div/div/div/div[1]/div/div/div[4]/div[2]/div[1]/div' | ||
def get_rows(browser, addr): | ||
sleep(1) | ||
|
||
# chờ để thông tin các chuyến bay được load đầy đủ và hiển thị trong css | ||
wait.until(display_to_be_flex((By.XPATH, '//*[@id="root"]/div[1]/div[2]/div/div/div/div[1]/div/div/div[4]/div[2]/div[2]'))) | ||
wait.until(EC.visibility_of_all_elements_located((By.XPATH, root))) | ||
# browser.execute_script("document.querySelector('#my-button').click();") | ||
|
||
# scroll xuống cuối trang để thấy hết các muibox element | ||
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") | ||
sleep(1) | ||
# scroll lại lên đầu trang để bắt đầu crawl | ||
browser.execute_script("window.scrollTo(0, 0)") | ||
n_rows = len(find_by_xpath(browser.page_source, root)) | ||
print(n_rows) | ||
for i in range(n_rows): | ||
print(i, end = ' ') | ||
click = True | ||
|
||
ID = find_by_xpath(browser.page_source, | ||
root + f'[{i+1}]/div/div/div[1]/div[1]/span/span/text()')[0] | ||
|
||
# Thay đổi xpath để lấy text đúng trong trường hợp hết chỗ: div/p[1]/text() -> div//p//text() | ||
skyBoss_business = find_by_xpath(browser.page_source, root + f'[{i+1}]/div/div/div[2]/div[1]/div//p//text()')[0] | ||
skyBoss = find_by_xpath(browser.page_source, root + f'[{i+1}]/div/div/div[2]/div[2]/div//p//text()')[0] | ||
Deluxe = find_by_xpath(browser.page_source, root + f'[{i+1}]/div/div/div[2]/div[3]/div//p//text()')[0] | ||
Eco = find_by_xpath(browser.page_source, root + f'[{i+1}]/div/div/div[2]/div[4]/div//p//text()')[0] | ||
if skyBoss_business == skyBoss == Deluxe == Eco == 'Hết chỗ': | ||
click = False | ||
|
||
row_elem = wait.until(EC.element_to_be_clickable((By.XPATH, root + f'[{i+1}]/div/div[1]/div[1]'))) | ||
if click: | ||
click_with_js(row_elem) | ||
sleep(0.01) | ||
|
||
# scroll tới ô cần click | ||
# browser.execute_script('return arguments[0].scrollIntoView(true);', | ||
# wait.until(EC.element_to_be_clickable((By.XPATH, rows + f'[{i+1}]/div/div[1]/div[1]')))) | ||
|
||
|
||
# scroll tới ô hiện ra thông tin | ||
print('click = ', click) | ||
if click: | ||
browser.execute_script('return arguments[0].scrollIntoView(true);', | ||
wait.until(EC.element_to_be_clickable((By.XPATH, root + f'[{i+1}]/div/div[2]')))) | ||
else: | ||
pass | ||
|
||
hours = find_by_xpath(browser.page_source, root + f'[{i+1}]/div/div/div[1]/div[2]/text()') | ||
date = find_by_xpath(browser.page_source, addr + '/div/div/div/p[2]/text()')[0] | ||
|
||
|
||
# find the element with the string "Tổng tiền" using XPath | ||
# total_fee = find_by_xpath(browser.page_source, '//h4[text()="Tổng tiền"]/following-sibling::h4/text()') | ||
# print(total_fee) | ||
|
||
start_ = find_by_xpath(browser.page_source, root + | ||
f'[{i+1}]/div/div[2]/div/div/div/div/div/div/div[2]/div/div[1]/div/div[2]/p[1][1]/text()')[0] if click else hours[0][:-1] + ' ' + date | ||
end_ = find_by_xpath(browser.page_source, root + | ||
f'[{i+1}]/div/div[2]/div/div/div/div/div/div/div[2]/div/div[2]/div/div[2]/p[1][1]/text()')[0] if click else hours[1][1:] + ' ' + date | ||
airbus = ''.join(element for element in find_by_xpath(browser.page_source, root + f'[{i+1}]/div/div/div[1]/div[3]/span/text()')) | ||
# wait.until(EC.presence_of_element_located((By.XPATH, root + f'[{i+1}]/div/div[1]/div[1]'))).click() | ||
|
||
data.append({'Số hiệu chuyến bay': ID, 'Khởi hành': start_, 'Đến': end_,'Airbus':airbus, | ||
'SkyBOSS_Business':skyBoss_business, 'SkyBOSS': skyBoss, 'Deluxe': Deluxe, 'Eco': Eco}) | ||
if click: | ||
click_with_js(row_elem) | ||
print('Đã load xong...\n') | ||
# scroll lên đầu trang để chuyển qua ngày khác | ||
browser.find_element(By.TAG_NAME, "body").send_keys(Keys.CONTROL + Keys.HOME) | ||
return data | ||
|
||
slick_track = tree.find("div", class_="slick-track") | ||
count = 0 | ||
rows = '//*[@id="root"]/div[1]/div[2]/div/div/div/div[1]/div/div/div[4]/div[2]/div[1]/div' | ||
|
||
childrens = list(slick_track.children)[:nums_date] | ||
|
||
if find_by_xpath(browser.page_source, '//h5[contains(text(), "Không tìm thấy chuyến bay nào cho lựa chọn của bạn. Quay lại để chọn ngày khác.")]'): | ||
childrens = list(slick_track.children)[1:nums_date+1] | ||
|
||
for child in childrens: | ||
# print(f'{(int(child.attrs["data-index"])+1)*"."}') | ||
sleep(1) | ||
# if wait.until(EC.visibility_of_element_located((By.XPATH, '(//p[contains(text(), "Hết chỗ")])'))): | ||
# browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") | ||
# wait.until(EC.visibility_of_element_located((By.XPATH, f'(//p[contains(text(), "Hết chỗ")]/ancestor::div/ancestor::div/following-sibling::div/div/p[contains(text(), "{tomorrow_day}")])'))).click() | ||
# wait.until(EC.visibility_of_element_located((By.XPATH, '//span[contains(text(), "Đi tiếp")]'))).click() | ||
|
||
# browser.execute_script("window.scrollTo(0,0);") | ||
addr = f'//*[@data-index="{int(child.attrs["data-index"])}"]' | ||
next_day = wait.until(EC.element_to_be_clickable((By.XPATH, f'//*[@data-index="{int(child.attrs["data-index"])}"]'))) | ||
try: | ||
next_day.click() | ||
except: | ||
next_day.click() | ||
|
||
# print(len(find_by_xpath(browser.page_source, '//*[@id="root"]/div[1]/div[2]/div/div/div/div[1]/div/div/div[4]/div[2]/div[1]/div'))) | ||
count += len(find_by_xpath(browser.page_source, '//*[@id="root"]/div[1]/div[2]/div/div/div/div[1]/div/div/div[4]/div[2]/div[1]/div')) | ||
# get_rows_selenium(browser) | ||
get_rows(browser, addr) | ||
print('Tổng số chuyến bay: ', count) | ||
|
||
######################################################################################################################################### | ||
import re | ||
import os | ||
|
||
flight = pd.DataFrame(data) | ||
ticket_class = ['SkyBOSS_Business', 'SkyBOSS', 'Deluxe', 'Eco'] | ||
for i in ticket_class: | ||
flight[i] = flight[i].apply(lambda x: np.nan if x =='Hết chỗ' else x.replace(',', '') + 'e3').astype(float) | ||
|
||
pattern = r'(\d{1,2})\s+tháng\s+(\d{1,2})' | ||
flight['Khởi hành'] = flight['Khởi hành'].str.replace('( \(Giờ địa phương\))', | ||
'', regex=True).str.replace(pattern, lambda x: f"{int(x.group(1)):02}/{int(x.group(2)):02}/2023", regex=True).str.replace(',', '', regex=True) | ||
flight['Đến'] = flight['Đến'].str.replace('( \(Giờ địa phương\))', | ||
'', regex=True).str.replace(pattern, lambda x: f"{int(x.group(1)):02}/{int(x.group(2)):02}/2023", regex=True).str.replace(',', '', regex=True) | ||
|
||
str_time = '%H:%M %d/%m/%Y' | ||
flight['Khởi hành'] = pd.to_datetime(flight['Khởi hành'], format=str_time) | ||
flight['Đến'] = pd.to_datetime(flight['Đến'], format=str_time) | ||
|
||
fee = 593000.0 | ||
flight[['SkyBOSS_Business', 'SkyBOSS', 'Deluxe', 'Eco']] = flight[['SkyBOSS_Business', 'SkyBOSS', 'Deluxe', 'Eco']] * 1.1+ fee | ||
flight['Thuế_phí'] = 593.0 | ||
|
||
path = today.strftime('%d%m%Y') | ||
|
||
if not os.path.exists(path): | ||
# Create the directory | ||
os.makedirs(path) | ||
print(f"Directory '{path}' created successfully.") | ||
else: | ||
print(f"Directory '{path}' already exists.") | ||
|
||
|
||
flight.to_csv(path + f'/{path}.csv') |
Oops, something went wrong.