From dd5764c27f81e9583be068c41c208f5f270ab0a7 Mon Sep 17 00:00:00 2001 From: Re-st Date: Wed, 15 Nov 2023 21:42:52 +0900 Subject: [PATCH] [scrap] Integrate the last commit #8aa00ca into current format. add few councils --- scrap/local_councils/__init__.py | 3 - scrap/local_councils/busan.py | 4 +- scrap/local_councils/daegu.py | 2 +- scrap/local_councils/daejeon/daejeon.py | 117 --- scrap/local_councils/gangwon.py | 5 +- scrap/local_councils/gyeongsang.py | 664 ++++++++++++++++++ .../local_councils/gyeongsangbuk/__init__.py | 10 - scrap/local_councils/gyeongsangbuk/andong.py | 38 - .../local_councils/gyeongsangbuk/cheongdo.py | 38 - scrap/local_councils/gyeongsangbuk/chilgok.py | 45 -- .../local_councils/gyeongsangbuk/gimcheon.py | 38 - .../local_councils/gyeongsangbuk/goryeong.py | 38 - scrap/local_councils/gyeongsangbuk/gumi.py | 38 - .../local_councils/gyeongsangbuk/gyeongju.py | 41 -- .../gyeongsangbuk/moongyeong.py | 39 - scrap/local_councils/gyeongsangbuk/pohang.py | 40 -- scrap/local_councils/gyeongsangbuk/sangju.py | 40 -- scrap/local_councils/gyeongsangbuk/uiseong.py | 40 -- scrap/local_councils/gyeongsangbuk/uljin.py | 40 -- .../local_councils/gyeongsangbuk/yaecheon.py | 40 -- scrap/local_councils/gyeongsangbuk/yungduk.py | 43 -- .../gyeongsangnam/changnyeong.py | 43 -- .../local_councils/gyeongsangnam/changwon.py | 40 -- scrap/local_councils/gyeongsangnam/geoje.py | 45 -- scrap/local_councils/gyeongsangnam/gimhae.py | 43 -- scrap/local_councils/gyeongsangnam/goseong.py | 38 - scrap/local_councils/gyeongsangnam/hamyang.py | 36 - scrap/local_councils/gyeongsangnam/hanam.py | 42 -- scrap/local_councils/gyeongsangnam/hapchun.py | 45 -- scrap/local_councils/gyeongsangnam/jinju.py | 38 - scrap/local_councils/gyeongsangnam/miryang.py | 42 -- scrap/local_councils/gyeongsangnam/namhae.py | 38 - scrap/local_councils/gyeongsangnam/sacheon.py | 38 - .../local_councils/gyeongsangnam/sanchung.py | 45 -- .../local_councils/gyeongsangnam/uiryeong.py | 39 - scrap/local_councils/gyeongsangnam/yangsan.py | 36 - scrap/local_councils/jeolla.py | 157 ++++- scrap/local_councils/junnam/danyang.py | 43 -- scrap/local_councils/junnam/gangjin.py | 43 -- scrap/local_councils/junnam/goheung.py | 43 -- scrap/local_councils/junnam/hamppyeong.py | 48 -- scrap/local_councils/junnam/henam.py | 38 - scrap/local_councils/junnam/muan.py | 42 -- scrap/local_councils/junnam/wando.py | 35 - scrap/local_councils/junnam/yeonggwang.py | 44 -- scrap/utils/scrap_args.json | 26 + scrap/utils/spreadsheet.py | 16 +- 47 files changed, 857 insertions(+), 1649 deletions(-) delete mode 100644 scrap/local_councils/daejeon/daejeon.py create mode 100644 scrap/local_councils/gyeongsang.py delete mode 100644 scrap/local_councils/gyeongsangbuk/__init__.py delete mode 100644 scrap/local_councils/gyeongsangbuk/andong.py delete mode 100644 scrap/local_councils/gyeongsangbuk/cheongdo.py delete mode 100644 scrap/local_councils/gyeongsangbuk/chilgok.py delete mode 100644 scrap/local_councils/gyeongsangbuk/gimcheon.py delete mode 100644 scrap/local_councils/gyeongsangbuk/goryeong.py delete mode 100644 scrap/local_councils/gyeongsangbuk/gumi.py delete mode 100644 scrap/local_councils/gyeongsangbuk/gyeongju.py delete mode 100644 scrap/local_councils/gyeongsangbuk/moongyeong.py delete mode 100644 scrap/local_councils/gyeongsangbuk/pohang.py delete mode 100644 scrap/local_councils/gyeongsangbuk/sangju.py delete mode 100644 scrap/local_councils/gyeongsangbuk/uiseong.py delete mode 100644 scrap/local_councils/gyeongsangbuk/uljin.py delete mode 100644 scrap/local_councils/gyeongsangbuk/yaecheon.py delete mode 100644 scrap/local_councils/gyeongsangbuk/yungduk.py delete mode 100644 scrap/local_councils/gyeongsangnam/changnyeong.py delete mode 100644 scrap/local_councils/gyeongsangnam/changwon.py delete mode 100644 scrap/local_councils/gyeongsangnam/geoje.py delete mode 100644 scrap/local_councils/gyeongsangnam/gimhae.py delete mode 100644 scrap/local_councils/gyeongsangnam/goseong.py delete mode 100644 scrap/local_councils/gyeongsangnam/hamyang.py delete mode 100644 scrap/local_councils/gyeongsangnam/hanam.py delete mode 100644 scrap/local_councils/gyeongsangnam/hapchun.py delete mode 100644 scrap/local_councils/gyeongsangnam/jinju.py delete mode 100644 scrap/local_councils/gyeongsangnam/miryang.py delete mode 100644 scrap/local_councils/gyeongsangnam/namhae.py delete mode 100644 scrap/local_councils/gyeongsangnam/sacheon.py delete mode 100644 scrap/local_councils/gyeongsangnam/sanchung.py delete mode 100644 scrap/local_councils/gyeongsangnam/uiryeong.py delete mode 100644 scrap/local_councils/gyeongsangnam/yangsan.py delete mode 100644 scrap/local_councils/junnam/danyang.py delete mode 100644 scrap/local_councils/junnam/gangjin.py delete mode 100644 scrap/local_councils/junnam/goheung.py delete mode 100644 scrap/local_councils/junnam/hamppyeong.py delete mode 100644 scrap/local_councils/junnam/henam.py delete mode 100644 scrap/local_councils/junnam/muan.py delete mode 100644 scrap/local_councils/junnam/wando.py delete mode 100644 scrap/local_councils/junnam/yeonggwang.py diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index efe61ff..8228cfd 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -2,9 +2,6 @@ 각 기초의회들의 크롤링 코드를 모아놓은 패키지입니다. 광역자치단체 별로 폴더를 만들어서 관리합니다. """ -from .daejeon.daejeon import * -from .ulsan import * -from .daejeon import * import re from urllib.parse import urlparse from typing import List diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index f56a25a..2f2ec53 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -239,7 +239,7 @@ def scrap_36(url, cid, args: ArgsType = None) -> ScrapResult: party = "정당 정보 없음" party_info = name_tag.find_next("span", string="소속당 : ") if party_info: - party = party_info.parent.get_text(strip=True)[7:].strip() + party = party_info.parent.get_text(strip=True)[6:].strip() councilors.append(Councilor(name=name, jdName=party)) @@ -284,7 +284,7 @@ def scrap_38(url, cid, args: ArgsType = None) -> ScrapResult: party = "정당 정보 없음" party_info = profile.find("span", class_="bold", string="정당 : ") if party_info: - party = party_info.parent.get_text(strip=True)[5:].strip() + party = party_info.parent.get_text(strip=True)[4:].strip() councilors.append(Councilor(name=name, jdName=party)) diff --git a/scrap/local_councils/daegu.py b/scrap/local_councils/daegu.py index dcfc84e..22bf3ad 100644 --- a/scrap/local_councils/daegu.py +++ b/scrap/local_councils/daegu.py @@ -105,7 +105,7 @@ def scrap_46(url, cid, args: ArgsType = None) -> ScrapResult: for profile in soup.find_all("div", class_="profile"): name_tag = profile.find("em", class_="name") name = ( - name_tag.get_text(strip=True).split()[0].strip() if name_tag else "이름 정보 없음" + name_tag.get_text(strip=True).split()[0].replace('의원', '').strip() if name_tag else "이름 정보 없음" ) party = "정당 정보 없음" diff --git a/scrap/local_councils/daejeon/daejeon.py b/scrap/local_councils/daejeon/daejeon.py deleted file mode 100644 index 9b9ad60..0000000 --- a/scrap/local_councils/daejeon/daejeon.py +++ /dev/null @@ -1,117 +0,0 @@ -from scrap.local_councils import * -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_65(url, cid) -> ScrapResult: - """대전 동구""" - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - # 프로필 링크 스크랩을 위해 base_url 추출 - parsed_url = urlparse(url) - base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" - - for profile in soup.find_all("dl", class_="profile"): - name_tag = profile.find("strong", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - party = "정당 정보 없음" - - # 프로필보기 링크 가져오기 - profile_link = profile.find("a", class_="start") - if profile_link: - data_uid = profile_link.get("data-uid") - if data_uid: - profile_url = base_url + f"/kr/member/profile_popup?uid={data_uid}" - profile_soup = get_soup(profile_url, verify=False) - party_info = profile_soup.find("strong", string="정 당") - if ( - party_info - and (party_span := party_info.find_next("span")) is not None - ): - party = party_span.text - - councilors.append(Councilor(name=name, jdName=party)) - - return ret_local_councilors(cid, councilors) - - -def scrap_66(url, cid) -> ScrapResult: - """대전 중구""" - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("div", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ret_local_councilors(cid, councilors) - - -def scrap_67( - url, - cid, -) -> ScrapResult: - """대전 서구""" - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("dl"): - name_tag = profile.find("dd", class_="name") - name = ( - name_tag.get_text(strip=True).replace(" 의원", "") if name_tag else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_info = list(filter(lambda x: "정당" in str(x), profile.find_all("dd"))) - if party_info: - party = party_info[0].get_text(strip=True).replace("정당: ", "") - - councilors.append(Councilor(name=name, jdName=party)) - - return ret_local_councilors(cid, councilors) - - -def scrap_68(url, cid) -> ScrapResult: - """대전 유성구""" - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - # () 안에 있는 한자를 제거 (ex. 김영희(金英姬) -> 김영희) - name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - regex_pattern = re.compile(r"정\s*당\s*:", re.IGNORECASE) # Case-insensitive - party_info = profile.find("em", string=regex_pattern) - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ret_local_councilors(cid, councilors) - - -def scrap_69(url, cid) -> ScrapResult: - """대전 대덕구""" - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - regex_pattern = re.compile(r"정\s*당\s*:", re.IGNORECASE) # Case-insensitive - party_info = profile.find("em", string=regex_pattern) - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ret_local_councilors(cid, councilors) \ No newline at end of file diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index 42bf2f7..eb65449 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -1,5 +1,6 @@ import os +from scrap.local_councils import * from scrap.local_councils.basic import * from scrap.utils.requests import get_selenium, By from scrap.utils.utils import getPartyList @@ -171,7 +172,7 @@ def scrap_118( for profile in soup.find_all("div", class_="person_info"): name_td = profile.find("th", string="성함/직위").find_next("td") - name = name_td.get_text(strip=True) if name_td else "이름 정보 없음" + name = name_td.get_text(strip=True).split()[0] if name_td else "이름 정보 없음" party_td = profile.find("th", string="정당").find_next("td") party = party_td.get_text(strip=True) if party_td else "정당 정보 없음" @@ -290,7 +291,7 @@ def scrap_123( for profile in soup.find_all("div", class_="img_text_box"): name_li = profile.select_one("ul.bu li:contains('성명')") name = ( - name_li.get_text(strip=True).replace("성명", "").strip() + name_li.get_text(strip=True).replace("성명", "").strip().split()[0] if name_li else "이름 정보 없음" ) diff --git a/scrap/local_councils/gyeongsang.py b/scrap/local_councils/gyeongsang.py new file mode 100644 index 0000000..36eb1e0 --- /dev/null +++ b/scrap/local_councils/gyeongsang.py @@ -0,0 +1,664 @@ +import requests +from scrap.local_councils import * +from scrap.utils.requests import get_selenium, By +from scrap.local_councils.basic import getprofiles, getname, extract_party, find, findall, regex_pattern + +def scrap_186( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 포항시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="mlist")[0] + + for profile in mlist.find_all("li"): + name_tag = profile.find("dd", class_="name") + name = name_tag.get_text(strip=True).split(" ")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="정") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_188( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 경주시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"https://council.gyeongju.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_189( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 김천시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="memberList")[0] + + for profile in mlist.find_all("li", recursive=False): + name_tag = profile.find("h4") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string=re.compile(r"소속정당\s*:", re.IGNORECASE)) + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_190( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 안동시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_191( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 구미시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="mlist")[0] + + for profile in mlist.find_all("li"): + name_tag = profile.find("dd", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="정") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_194( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 상주시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("div", class_="name").find("strong") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당") + if party_info: + party = party_info.find_next("span").find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_195( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 문경시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"https://council.gbmg.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_196( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 예천군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"https://www.ycgcl.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_198( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 청도군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_199( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 고령군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True).split("\r")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="정 당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_201( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 칠곡군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="memberUl")[0] + + for profile in mlist.find_all("li", recursive=False): + info = profile.find_all("dd") + if info: + name = ( + profile.find("dd", class_="name").get_text(strip=True) + if profile.find("dd", class_="name").get_text(strip=True) + else "이름 정보 없음" + ) + + party = "정당 정보 없음" + party_dd = info[3].get_text(strip=True).replace("정당 : ", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_203( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 의성군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"http://www.cus.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_206( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 영덕군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("div", class_="card_area") + + for profile in mlist: + info = profile.find_all("li") + if info: + name = ( + profile.find("dt").get_text(strip=True).split("(")[0] + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) + + party = "정당 정보 없음" + party_dd = info[3].get_text(strip=True).replace("정당: ", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_208( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상북도 울진군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"http://council.uljin.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_209( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 창원시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="mlist")[0] + + for profile in mlist.find_all("li"): + name_tag = profile.find("dd", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="정") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_210( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 진주시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("div", class_="name").find("strong") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당") + if party_info: + party = party_info.find_next("span").find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_212( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 고성군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_213( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 사천시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + + +def scrap_214( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 김해시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("div", class_="card_area") + + for profile in mlist: + info = profile.find_all("li") + if info: + name = ( + profile.find("dt").get_text(strip=True).split("(")[0] + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) + + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정 당 :", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_215( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 밀양시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="council_box"): + name_tag = ( + profile.find("span", string="이름").find_next("span").get_text(strip=True) + ) + name = name_tag if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = ( + profile.find("span", string="소속정당").find_next("span").get_text(strip=True) + ) + if party_info: + party = party_info + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_216( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 거제시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("dl") + + for profile in mlist: + info = profile.find_all("li") + if info: + name = ( + profile.find("dt").get_text(strip=True) + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) + + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정당 :", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_217( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 의령군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("li", class_="assemList"): + name_tag = profile.find("p", class_="assemName") + name = name_tag.get_text(strip=True).split(" ")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("ul", class_="assemCate") + party_info = party_info.find("li") + if party_info: + party = party_info.get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_218( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 함안군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("div", class_="column") + + for profile in mlist: + name = ( + profile.find("h2").get_text(strip=True).split("\n")[0] + if profile.find("h2").get_text(strip=True) + else "이름 정보 없음" + ) + info = profile.find_all("li") + if info: + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정당", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_219( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 창녕군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("div", class_="card_area") + + for profile in mlist: + info = profile.find_all("li") + if info: + name = ( + profile.find("dt").get_text(strip=True).split("(")[0] + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) + + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정 당 :", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_220( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 양산시""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="member"): + name_tag = profile.find("strong", class_="name") + name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("strong", string="정 당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_222( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 남해군""" + soup = get_soup(url, verify=False, encoding="euc-kr") + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("li", class_="name") + name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find_all("li")[3] + if party_info: + party = party_info.get_text(strip=True).replace("소속정당 : ", "") + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_223( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 함양군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_224( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 산청군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find("ul", class_="comment_list") + lis = mlist.find_all("li", recursive=False) + for profile in lis: + print(profile) + info = profile.find_all("li") + name = ( + profile.find("span", class_="name").get_text(strip=True) + if profile.find("span", class_="name").get_text(strip=True) + else "이름 정보 없음" + ) + party = "정당 정보 없음" + + party_dd = info[3].get_text(strip=True).replace("소속정당", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_226( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """경상남도 합천군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("dl", class_="member") + + for profile in mlist: + info = profile.find_all("li") + if info: + name = ( + info[0].get_text(strip=True).split("(")[0] + if info[0].get_text(strip=True) + else "이름 정보 없음" + ) + + party = "정당 정보 없음" + party_dd = info[3].get_text(strip=True).replace("소속정당 : ", "") + if party_dd: + party = party_dd.replace(" ", "") + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) diff --git a/scrap/local_councils/gyeongsangbuk/__init__.py b/scrap/local_councils/gyeongsangbuk/__init__.py deleted file mode 100644 index 9204778..0000000 --- a/scrap/local_councils/gyeongsangbuk/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -경상북도 기초의회들의 크롤링 코드를 모아둔 사이트입니다. -""" -from .andong import scrap_andong -from .pohang import scrap_pohang -from .gyeongju import scrap_gyeongju -from .gimcheon import scrap_gimcheon -from .sangju import scrap_sangju -from .moongyeong import scrap_moongyeong -from .yaecheon import scrap_yaecheon diff --git a/scrap/local_councils/gyeongsangbuk/andong.py b/scrap/local_councils/gyeongsangbuk/andong.py deleted file mode 100644 index 88ea1cf..0000000 --- a/scrap/local_councils/gyeongsangbuk/andong.py +++ /dev/null @@ -1,38 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - -import re - - -def scrap_andong(url="https://council.andong.go.kr/kr/member/name.do") -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="andong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_andong()) diff --git a/scrap/local_councils/gyeongsangbuk/cheongdo.py b/scrap/local_councils/gyeongsangbuk/cheongdo.py deleted file mode 100644 index f6bcc04..0000000 --- a/scrap/local_councils/gyeongsangbuk/cheongdo.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_cheongdo( - url="https://www.cheongdocl.go.kr/kr/member/active.do", -) -> ScrapResult: - """ - Scrap councilors’ details from Yongsan-gu District Council of Seoul page. - - :param url: Yongsan-gu District Council members' list site url - :return: Councilors’ name and party data in ScrapResult object - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="cheongdo", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_cheongdo()) diff --git a/scrap/local_councils/gyeongsangbuk/chilgok.py b/scrap/local_councils/gyeongsangbuk/chilgok.py deleted file mode 100644 index 9d7c11f..0000000 --- a/scrap/local_councils/gyeongsangbuk/chilgok.py +++ /dev/null @@ -1,45 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_chilgok( - url="https://council.chilgok.go.kr/content/member/member.html", -) -> ScrapResult: - """칠곡군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="memberUl")[0] - - for profile in mlist.find_all("li", recursive=False): - info = profile.find_all("dd") - if info: - name = ( - profile.find("dd", class_="name").get_text(strip=True) - if profile.find("dd", class_="name").get_text(strip=True) - else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_dd = info[3].get_text(strip=True).replace("정당 : ", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="chilgok", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_chilgok()) diff --git a/scrap/local_councils/gyeongsangbuk/gimcheon.py b/scrap/local_councils/gyeongsangbuk/gimcheon.py deleted file mode 100644 index d6b01ff..0000000 --- a/scrap/local_councils/gyeongsangbuk/gimcheon.py +++ /dev/null @@ -1,38 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_gimcheon(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: - """김천시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="memberList")[0] - - for profile in mlist.find_all("li", recursive=False): - name_tag = profile.find("h4") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("span", string="소속정당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="gimcheon", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_gimcheon()) diff --git a/scrap/local_councils/gyeongsangbuk/goryeong.py b/scrap/local_councils/gyeongsangbuk/goryeong.py deleted file mode 100644 index e25c642..0000000 --- a/scrap/local_councils/gyeongsangbuk/goryeong.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_goryeong( - url="https://council.goryeong.go.kr/kr/member/active.do", -) -> ScrapResult: - """ - Scrap councilors’ details from Yongsan-gu District Council of Seoul page. - - :param url: Yongsan-gu District Council members' list site url - :return: Councilors’ name and party data in ScrapResult object - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True).split("\r")[0] if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="정 당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="goryeong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_goryeong()) diff --git a/scrap/local_councils/gyeongsangbuk/gumi.py b/scrap/local_councils/gyeongsangbuk/gumi.py deleted file mode 100644 index e8242ac..0000000 --- a/scrap/local_councils/gyeongsangbuk/gumi.py +++ /dev/null @@ -1,38 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_gumi( - url="https://gumici.or.kr/content/member/memberName.html", -) -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="mlist")[0] - - for profile in mlist.find_all("li"): - name_tag = profile.find("dd", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("span", string="정") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="gumi", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors - ) - - -if __name__ == "__main__": - print(scrap_gumi()) diff --git a/scrap/local_councils/gyeongsangbuk/gyeongju.py b/scrap/local_councils/gyeongsangbuk/gyeongju.py deleted file mode 100644 index 193bd4d..0000000 --- a/scrap/local_councils/gyeongsangbuk/gyeongju.py +++ /dev/null @@ -1,41 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re -import requests - - -def scrap_gyeongju( - url="https://council.gyeongju.go.kr/kr/member/name.do", -) -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - data_uid = profile.find("a", class_="btn_profile")["data-uid"] - - if data_uid: - url = f"https://council.gyeongju.go.kr/common/async/member/{data_uid}.do" - result = requests.get(url).json() - name = result["name"] if result["name"] else "이름 정보 없음" - party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="gyeongju", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_gyeongju()) diff --git a/scrap/local_councils/gyeongsangbuk/moongyeong.py b/scrap/local_councils/gyeongsangbuk/moongyeong.py deleted file mode 100644 index ea3647a..0000000 --- a/scrap/local_councils/gyeongsangbuk/moongyeong.py +++ /dev/null @@ -1,39 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re -import requests - - -def scrap_moongyeong(url="https://council.gbmg.go.kr/kr/member/name.do") -> ScrapResult: - """문경시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - data_uid = profile.find("a", class_="btn_profile")["data-uid"] - - if data_uid: - url = f"https://council.gbmg.go.kr/common/async/member/{data_uid}.do" - result = requests.get(url).json() - name = result["name"] if result["name"] else "이름 정보 없음" - party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="moongyeong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_moongyeong()) diff --git a/scrap/local_councils/gyeongsangbuk/pohang.py b/scrap/local_councils/gyeongsangbuk/pohang.py deleted file mode 100644 index 2324771..0000000 --- a/scrap/local_councils/gyeongsangbuk/pohang.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_pohang( - url="https://council.pohang.go.kr/content/member/memberName.html", -) -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="mlist")[0] - - for profile in mlist.find_all("li"): - name_tag = profile.find("dd", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("span", string="정") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="pohang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_pohang()) diff --git a/scrap/local_councils/gyeongsangbuk/sangju.py b/scrap/local_councils/gyeongsangbuk/sangju.py deleted file mode 100644 index 89ca96d..0000000 --- a/scrap/local_councils/gyeongsangbuk/sangju.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - -import re - - -def scrap_sangju( - url="https://www.sangjucouncil.go.kr/kr/member/name.do", -) -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("div", class_="name").find("strong") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당") - if party_info: - party = party_info.find_next("span").find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="sangju", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_sangju()) diff --git a/scrap/local_councils/gyeongsangbuk/uiseong.py b/scrap/local_councils/gyeongsangbuk/uiseong.py deleted file mode 100644 index c835f59..0000000 --- a/scrap/local_councils/gyeongsangbuk/uiseong.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - -import re - - -def scrap_uiseong(url="http://www.cus.go.kr/kr/member/name.do") -> ScrapResult: - """의성군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - data_uid = profile.find("a", class_="btn_profile")["data-uid"] - - if data_uid: - url = f"http://www.cus.go.kr/common/async/member/{data_uid}.do" - result = requests.get(url).json() - name = result["name"] if result["name"] else "이름 정보 없음" - party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="uiseong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_uiseong()) diff --git a/scrap/local_councils/gyeongsangbuk/uljin.py b/scrap/local_councils/gyeongsangbuk/uljin.py deleted file mode 100644 index 6304f7d..0000000 --- a/scrap/local_councils/gyeongsangbuk/uljin.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - -import re - - -def scrap_uljin(url="https://council.uljin.go.kr/kr/member/name.do") -> ScrapResult: - """울진군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - data_uid = profile.find("a", class_="btn_profile")["data-uid"] - - if data_uid: - url = f"http://council.uljin.go.kr/common/async/member/{data_uid}.do" - result = requests.get(url).json() - name = result["name"] if result["name"] else "이름 정보 없음" - party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="uiseong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_uljin()) diff --git a/scrap/local_councils/gyeongsangbuk/yaecheon.py b/scrap/local_councils/gyeongsangbuk/yaecheon.py deleted file mode 100644 index 421b9cc..0000000 --- a/scrap/local_councils/gyeongsangbuk/yaecheon.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - -import re - - -def scrap_yaecheon(url="https://www.ycgcl.kr/kr/member/name.do") -> ScrapResult: - """예천시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - data_uid = profile.find("a", class_="btn_profile")["data-uid"] - - if data_uid: - url = f"https://www.ycgcl.kr/common/async/member/{data_uid}.do" - result = requests.get(url).json() - name = result["name"] if result["name"] else "이름 정보 없음" - party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="yaecheon", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_yaecheon()) diff --git a/scrap/local_councils/gyeongsangbuk/yungduk.py b/scrap/local_councils/gyeongsangbuk/yungduk.py deleted file mode 100644 index 91eba2b..0000000 --- a/scrap/local_councils/gyeongsangbuk/yungduk.py +++ /dev/null @@ -1,43 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_yungduk(url="https://council.yd.go.kr/kr/member/active") -> ScrapResult: - """영덕시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("div", class_="card_area") - - for profile in mlist: - info = profile.find_all("li") - if info: - name = ( - profile.find("dt").get_text(strip=True).split("(")[0] - if profile.find("dt").get_text(strip=True) - else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_dd = info[3].get_text(strip=True).replace("정당: ", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="yungduk", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_yungduk()) diff --git a/scrap/local_councils/gyeongsangnam/changnyeong.py b/scrap/local_councils/gyeongsangnam/changnyeong.py deleted file mode 100644 index 0df9376..0000000 --- a/scrap/local_councils/gyeongsangnam/changnyeong.py +++ /dev/null @@ -1,43 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_changnyeong(url="https://www.cngc.go.kr/kr/member/active") -> ScrapResult: - """창녕군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("div", class_="card_area") - - for profile in mlist: - info = profile.find_all("li") - if info: - name = ( - profile.find("dt").get_text(strip=True).split("(")[0] - if profile.find("dt").get_text(strip=True) - else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_dd = info[2].get_text(strip=True).replace("정 당 :", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="geoje", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_changnyeong()) diff --git a/scrap/local_councils/gyeongsangnam/changwon.py b/scrap/local_councils/gyeongsangnam/changwon.py deleted file mode 100644 index 7af6f24..0000000 --- a/scrap/local_councils/gyeongsangnam/changwon.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_changwon( - url="https://gumici.or.kr/content/member/memberName.html", -) -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="mlist")[0] - - for profile in mlist.find_all("li"): - name_tag = profile.find("dd", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("span", string="정") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="changwon", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_changwon()) diff --git a/scrap/local_councils/gyeongsangnam/geoje.py b/scrap/local_councils/gyeongsangnam/geoje.py deleted file mode 100644 index c00ede3..0000000 --- a/scrap/local_councils/gyeongsangnam/geoje.py +++ /dev/null @@ -1,45 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_geoje( - url="https://www.gjcl.go.kr/source/korean/member/active.html", -) -> ScrapResult: - """거제시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("dl") - - for profile in mlist: - info = profile.find_all("li") - if info: - name = ( - profile.find("dt").get_text(strip=True) - if profile.find("dt").get_text(strip=True) - else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_dd = info[2].get_text(strip=True).replace("정당 :", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="geoje", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_geoje()) diff --git a/scrap/local_councils/gyeongsangnam/gimhae.py b/scrap/local_councils/gyeongsangnam/gimhae.py deleted file mode 100644 index ac04429..0000000 --- a/scrap/local_councils/gyeongsangnam/gimhae.py +++ /dev/null @@ -1,43 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_gimhae(url="https://council.gimhae.go.kr/kr/member/active") -> ScrapResult: - """창녕군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("div", class_="card_area") - - for profile in mlist: - info = profile.find_all("li") - if info: - name = ( - profile.find("dt").get_text(strip=True).split("(")[0] - if profile.find("dt").get_text(strip=True) - else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_dd = info[2].get_text(strip=True).replace("정 당 :", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="gimhae", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_gimhae()) diff --git a/scrap/local_councils/gyeongsangnam/goseong.py b/scrap/local_councils/gyeongsangnam/goseong.py deleted file mode 100644 index c0fcb0b..0000000 --- a/scrap/local_councils/gyeongsangnam/goseong.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_goseong( - url="https://council.goseong.go.kr/kr/member/active.do", -) -> ScrapResult: - """ - Scrap councilors’ details from Yongsan-gu District Council of Seoul page. - - :param url: Yongsan-gu District Council members' list site url - :return: Councilors’ name and party data in ScrapResult object - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="goseong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_goseong()) diff --git a/scrap/local_councils/gyeongsangnam/hamyang.py b/scrap/local_councils/gyeongsangnam/hamyang.py deleted file mode 100644 index fbb430e..0000000 --- a/scrap/local_councils/gyeongsangnam/hamyang.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_hamyang(url="https://council.hygn.go.kr/kr/member/active.do") -> ScrapResult: - """ - Scrap councilors’ details from Yongsan-gu District Council of Seoul page. - - :param url: Yongsan-gu District Council members' list site url - :return: Councilors’ name and party data in ScrapResult object - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="hamyang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_hamyang()) diff --git a/scrap/local_councils/gyeongsangnam/hanam.py b/scrap/local_councils/gyeongsangnam/hanam.py deleted file mode 100644 index c358287..0000000 --- a/scrap/local_councils/gyeongsangnam/hanam.py +++ /dev/null @@ -1,42 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_hanam(url="https://www.haman.go.kr/04646/04669.web") -> ScrapResult: - """합천군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("div", class_="column") - - for profile in mlist: - name = ( - profile.find("h2").get_text(strip=True).split("\n")[0] - if profile.find("h2").get_text(strip=True) - else "이름 정보 없음" - ) - info = profile.find_all("li") - if info: - party = "정당 정보 없음" - party_dd = info[2].get_text(strip=True).replace("정당", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="hanam", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_hanam()) diff --git a/scrap/local_councils/gyeongsangnam/hapchun.py b/scrap/local_councils/gyeongsangnam/hapchun.py deleted file mode 100644 index 15cac33..0000000 --- a/scrap/local_councils/gyeongsangnam/hapchun.py +++ /dev/null @@ -1,45 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_hapchun( - url="https://www.hccl.go.kr/source/korean/member/active.jsp", -) -> ScrapResult: - """합천군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("dl", class_="member") - - for profile in mlist: - info = profile.find_all("li") - if info: - name = ( - info[0].get_text(strip=True) - if info[0].get_text(strip=True) - else "이름 정보 없음" - ) - - party = "정당 정보 없음" - party_dd = info[3].get_text(strip=True).replace("소속정당 : ", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="hapchun", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_hapchun()) diff --git a/scrap/local_councils/gyeongsangnam/jinju.py b/scrap/local_councils/gyeongsangnam/jinju.py deleted file mode 100644 index 138e811..0000000 --- a/scrap/local_councils/gyeongsangnam/jinju.py +++ /dev/null @@ -1,38 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - -import re - - -def scrap_jinju(url="https://www.jinjucl.com/kr/member/name.do") -> ScrapResult: - """대전시 동구 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("div", class_="name").find("strong") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당") - if party_info: - party = party_info.find_next("span").find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="jinju", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_jinju()) diff --git a/scrap/local_councils/gyeongsangnam/miryang.py b/scrap/local_councils/gyeongsangnam/miryang.py deleted file mode 100644 index 2300cbb..0000000 --- a/scrap/local_councils/gyeongsangnam/miryang.py +++ /dev/null @@ -1,42 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_miryang( - url="https://council.miryang.go.kr/web/EgovCouncilManList.do?menuNo=14010100", -) -> ScrapResult: - """밀양시 의회 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="council_box"): - name_tag = ( - profile.find("span", string="이름").find_next("span").get_text(strip=True) - ) - name = name_tag if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = ( - profile.find("span", string="소속정당").find_next("span").get_text(strip=True) - ) - if party_info: - party = party_info - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="miryang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_miryang()) diff --git a/scrap/local_councils/gyeongsangnam/namhae.py b/scrap/local_councils/gyeongsangnam/namhae.py deleted file mode 100644 index cfef590..0000000 --- a/scrap/local_councils/gyeongsangnam/namhae.py +++ /dev/null @@ -1,38 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_namhae( - url="https://council.namhae.go.kr/source/korean/member/active.html", -) -> ScrapResult: - """남해 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False, encoding="euc-kr") - councilors: List[Councilor] = [] - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("li", class_="name") - name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find_all("li")[3] - if party_info: - party = party_info.get_text(strip=True).replace("소속정당 : ", "") - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="yangsan", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_namhae()) diff --git a/scrap/local_councils/gyeongsangnam/sacheon.py b/scrap/local_councils/gyeongsangnam/sacheon.py deleted file mode 100644 index b42632d..0000000 --- a/scrap/local_councils/gyeongsangnam/sacheon.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_sacheon( - url="https://council.sacheon.go.kr/kr/member/active.do", -) -> ScrapResult: - """ - Scrap councilors’ details from Yongsan-gu District Council of Seoul page. - - :param url: Yongsan-gu District Council members' list site url - :return: Councilors’ name and party data in ScrapResult object - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("div", class_="profile"): - name_tag = profile.find("em", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("em", string="소속정당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="sacheon", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_sacheon()) diff --git a/scrap/local_councils/gyeongsangnam/sanchung.py b/scrap/local_councils/gyeongsangnam/sanchung.py deleted file mode 100644 index 7c65f6b..0000000 --- a/scrap/local_councils/gyeongsangnam/sanchung.py +++ /dev/null @@ -1,45 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_sanchung( - url="https://www.sancheong.go.kr/council/selectPersonalAssembly.do?key=2224&assemCate=8", -) -> ScrapResult: - """산청군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find("ul", class_="comment_list") - lis = mlist.find_all("li", recursive=False) - for profile in lis: - print(profile) - info = profile.find_all("li") - name = ( - profile.find("span", class_="name").get_text(strip=True) - if profile.find("span", class_="name").get_text(strip=True) - else "이름 정보 없음" - ) - party = "정당 정보 없음" - - party_dd = info[3].get_text(strip=True).replace("소속정당", "") - if party_dd: - party = party_dd - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="hapchun", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_sanchung()) diff --git a/scrap/local_councils/gyeongsangnam/uiryeong.py b/scrap/local_councils/gyeongsangnam/uiryeong.py deleted file mode 100644 index 19b21fe..0000000 --- a/scrap/local_councils/gyeongsangnam/uiryeong.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup - - -def scrap_uiryeong( - url="https://www.uiryeong.go.kr/board/list.uiryeong?boardId=BBS_0000169&menuCd=DOM_000000502001000000&contentsSid=1040", -) -> ScrapResult: - """ - Scrap councilors’ details from Yongsan-gu District Council of Seoul page. - - :param url: Yongsan-gu District Council members' list site url - :return: Councilors’ name and party data in ScrapResult object - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - - for profile in soup.find_all("li", class_="assemList"): - name_tag = profile.find("p", class_="assemName") - name = name_tag.get_text(strip=True).split(" ")[0] if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("ul", class_="assemCate") - party_info = party_info.find("li") - if party_info: - party = party_info.get_text(strip=True) - - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="goseong", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_uiryeong()) diff --git a/scrap/local_councils/gyeongsangnam/yangsan.py b/scrap/local_councils/gyeongsangnam/yangsan.py deleted file mode 100644 index ef26d33..0000000 --- a/scrap/local_councils/gyeongsangnam/yangsan.py +++ /dev/null @@ -1,36 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_yangsan(url="https://www.yscouncil.go.kr/kr/member/active") -> ScrapResult: - """양산시 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - for profile in soup.find_all("div", class_="member"): - name_tag = profile.find("strong", class_="name") - name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("strong", string="정 당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="yangsan", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_yangsan()) diff --git a/scrap/local_councils/jeolla.py b/scrap/local_councils/jeolla.py index 6079730..43e240d 100644 --- a/scrap/local_councils/jeolla.py +++ b/scrap/local_councils/jeolla.py @@ -1,3 +1,4 @@ +import requests from scrap.local_councils import * from scrap.utils.requests import get_selenium, By from scrap.local_councils.basic import getprofiles, getname, extract_party, find, findall, regex_pattern @@ -56,7 +57,7 @@ def scrap_156( for profile in findall(memberlist, "li"): name_tag = profile.find("div", class_="name") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + name = name_tag.get_text(strip=True).split()[0] if name_tag else "이름 정보 없음" party = "정당 정보 없음" # TODO @@ -87,7 +88,7 @@ def scrap_157( return ret_local_councilors(cid, councilors) -def scrap_160(url, cid) -> ScrapResult: +def scrap_160(url, cid, args: ArgsType = None) -> ScrapResult: """전라북도 임실군""" browser = get_selenium(url) councilors: list[Councilor] = [] @@ -102,7 +103,7 @@ def scrap_160(url, cid) -> ScrapResult: return ret_local_councilors(cid, councilors) -def scrap_161(url, cid) -> ScrapResult: +def scrap_161(url, cid, args: ArgsType = None) -> ScrapResult: """전라북도 순창군""" browser = get_selenium(url) councilors: list[Councilor] = [] @@ -130,7 +131,7 @@ def scrap_162( for profile in findall(soup, "div", class_="con_mem"): name_tag = profile.find("strong") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + name = name_tag.get_text(strip=True).split()[0] if name_tag else "이름 정보 없음" party = "정당 정보 없음" # TODO @@ -324,5 +325,149 @@ def scrap_167( # return ret_local_councilors(cid, councilors) -if __name__ == "__main__": - print(scrap_161("https://www.sunchangcouncil.go.kr/main/contents/lawmaker", 161)) +def scrap_177( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """전라남도 강진군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="memlist")[0] + + for profile in mlist.find_all("li", recursive=False): + info = profile.find("ul", class_="info") + name = ( + info.find("h5").get_text(strip=True) + if info.find("h5").get_text(strip=True) + else "이름 정보 없음" + ) + + li = info.find_all("li", recursive=False)[6] + party = "정당 정보 없음" + party_dd = li.find("dd") + if party_dd: + party = party_dd.get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_178( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """전라남도 완도군""" + councilors: List[Councilor] = [] + + result = requests.get(url) + result_json = result.json() + for profile in result_json["list"]: + name = profile["cmNm"] + party = profile["mpParty"] + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_179( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="memberList")[0] + + for profile in mlist.find_all("li", recursive=False): + name_tag = profile.find("h4") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="소속정당 :") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_182( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """전라남도 강진군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("ul", class_="formerCouncillor")[0] + + for profile in mlist.find_all("li", recursive=False): + info = profile.find("div", class_="profileInfo") + name = ( + info.find("div", class_="infosubmem_name").get_text(strip=True) + if info.find("div", class_="infosubmem_name").get_text(strip=True) + else "이름 정보 없음" + ) + + party_dd = info.find("div", class_="infoContents") + party = "정당 정보 없음" + if party_dd: + party = party_dd.get_text(strip=True) + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_183( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """전라남도 영광군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("div", class_="councilors_curr2_wrap")[0] + + for profile in mlist.find_all("div", class_="subcon_body_txt", recursive=False): + info = profile.find("div", class_="ygmember_txt") + name = ( + info.find("h4").get_text(strip=True).split(" ")[0] + if info.find("h4").get_text(strip=True) + else "이름 정보 없음" + ) + + party_dd = info.find("p", class_="party_highlight") + party = "정당 정보 없음" + if party_dd: + party = party_dd.get_text(strip=True).replace("정당 : ", "") + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) + +def scrap_184( + url, + cid, + args: ArgsType = None, +) -> ScrapResult: + """전라남도 함평군""" + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all("div", id="subContent")[0] + + total_div = mlist.find_all("div", class_="infosubcontent") + total_div.append(mlist.find_all("div", class_="infosubcontent2")) + for profile in total_div: + if not profile: + continue + info = profile.find("div", class_="infosub_detail") + name = ( + info.find("li", class_="infosubmem_name").get_text(strip=False)[:3] + if info.find("li", class_="infosubmem_name").get_text(strip=True) + else "이름 정보 없음" + ) + + party_dd = info.find("ul", class_="infosub").find_all("li")[1] + party = "정당 정보 없음" + if party_dd: + party = party_dd.get_text(strip=True).replace("소속정당 : ", "") + councilors.append(Councilor(name=name, jdName=party)) + + return ret_local_councilors(cid, councilors) diff --git a/scrap/local_councils/junnam/danyang.py b/scrap/local_councils/junnam/danyang.py deleted file mode 100644 index aaa4f50..0000000 --- a/scrap/local_councils/junnam/danyang.py +++ /dev/null @@ -1,43 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_damyang(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: - """담양군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="memlist")[0] - - for profile in mlist.find_all("li", recursive=False): - info = profile.find("ul", class_="info") - name = ( - info.find("h5").get_text(strip=True) - if info.find("h5").get_text(strip=True) - else "이름 정보 없음" - ) - - li = info.find("li", class_="item MP") - party = "정당 정보 없음" - party_dd = li.find_all("dd")[1] - if party_dd: - party = party_dd.get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="damyang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_damyang()) diff --git a/scrap/local_councils/junnam/gangjin.py b/scrap/local_councils/junnam/gangjin.py deleted file mode 100644 index 2964505..0000000 --- a/scrap/local_councils/junnam/gangjin.py +++ /dev/null @@ -1,43 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_gangjin(url="https://www.gangjincl.go.kr/index.do?PID=010") -> ScrapResult: - """강진군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="memlist")[0] - - for profile in mlist.find_all("li", recursive=False): - info = profile.find("ul", class_="info") - name = ( - info.find("h5").get_text(strip=True) - if info.find("h5").get_text(strip=True) - else "이름 정보 없음" - ) - - li = info.find_all("li", recursive=False)[6] - party = "정당 정보 없음" - party_dd = li.find("dd") - if party_dd: - party = party_dd.get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="damyang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_gangjin()) diff --git a/scrap/local_councils/junnam/goheung.py b/scrap/local_councils/junnam/goheung.py deleted file mode 100644 index 6aff1bc..0000000 --- a/scrap/local_councils/junnam/goheung.py +++ /dev/null @@ -1,43 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_goheung(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: - """고흥군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="memlist")[0] - - for profile in mlist.find_all("li", recursive=False): - info = profile.find("ul", class_="info") - name = ( - info.find("h5").get_text(strip=True) - if info.find("h5").get_text(strip=True) - else "이름 정보 없음" - ) - - li = info.find("li", class_="item MP") - party = "정당 정보 없음" - party_dd = li.find_all("dd")[1] - if party_dd: - party = party_dd.get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="damyang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_damyang()) diff --git a/scrap/local_councils/junnam/hamppyeong.py b/scrap/local_councils/junnam/hamppyeong.py deleted file mode 100644 index c6d4375..0000000 --- a/scrap/local_councils/junnam/hamppyeong.py +++ /dev/null @@ -1,48 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_hamppyeong( - url="https://www.hpcouncil.go.kr/main/incumbentCouncillor.do?PID=0201&item=01", -) -> ScrapResult: - """무안 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("div", id="subContent")[0] - - total_div = mlist.find_all("div", class_="infosubcontent") - total_div.append(mlist.find_all("div", class_="infosubcontent2")) - for profile in total_div: - if not profile: - continue - info = profile.find("div", class_="infosub_detail") - name = ( - info.find("li", class_="infosubmem_name").get_text(strip=False)[:3] - if info.find("li", class_="infosubmem_name").get_text(strip=True) - else "이름 정보 없음" - ) - - party_dd = info.find("ul", class_="infosub").find_all("li")[1] - party = "정당 정보 없음" - if party_dd: - party = party_dd.get_text(strip=True).replace("소속정당 : ", "") - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="yeonggwang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_hamppyeong()) diff --git a/scrap/local_councils/junnam/henam.py b/scrap/local_councils/junnam/henam.py deleted file mode 100644 index 419d9ce..0000000 --- a/scrap/local_councils/junnam/henam.py +++ /dev/null @@ -1,38 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_henam(url="http://council.haenam.go.kr/kr/member/active.do") -> ScrapResult: - """해남 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="memberList")[0] - - for profile in mlist.find_all("li", recursive=False): - name_tag = profile.find("h4") - name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" - - party = "정당 정보 없음" - party_info = profile.find("span", string="소속정당 : ") - if party_info: - party = party_info.find_next("span").get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="gimcheon", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_henam()) diff --git a/scrap/local_councils/junnam/muan.py b/scrap/local_councils/junnam/muan.py deleted file mode 100644 index 51070d8..0000000 --- a/scrap/local_councils/junnam/muan.py +++ /dev/null @@ -1,42 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_muan( - url="http://www.muan.or.kr/main/incumbentCouncillor.do?PID=0201", -) -> ScrapResult: - """무안 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("ul", class_="formerCouncillor")[0] - - for profile in mlist.find_all("li", recursive=False): - info = profile.find("div", class_="profileInfo") - name = ( - info.find("div", class_="infosubmem_name").get_text(strip=True) - if info.find("div", class_="infosubmem_name").get_text(strip=True) - else "이름 정보 없음" - ) - - party_dd = info.find("div", class_="infoContents") - party = "정당 정보 없음" - if party_dd: - party = party_dd.get_text(strip=True) - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="muan", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors - ) - - -if __name__ == "__main__": - print(scrap_muan()) diff --git a/scrap/local_councils/junnam/wando.py b/scrap/local_councils/junnam/wando.py deleted file mode 100644 index 7b69659..0000000 --- a/scrap/local_councils/junnam/wando.py +++ /dev/null @@ -1,35 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import requests - - -def scrap_wando( - url="http://www.wdcc.or.kr:8088/common/selectCouncilMemberList.json?searchCsDaesoo=9", -) -> ScrapResult: - """완도군 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - councilors: List[Councilor] = [] - - result = requests.get(url) - result_json = result.json() - for profile in result_json["list"]: - name = profile["cmNm"] - party = profile["mpParty"] - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="wando", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_wando()) diff --git a/scrap/local_councils/junnam/yeonggwang.py b/scrap/local_councils/junnam/yeonggwang.py deleted file mode 100644 index f7d1c87..0000000 --- a/scrap/local_councils/junnam/yeonggwang.py +++ /dev/null @@ -1,44 +0,0 @@ -from urllib.parse import urlparse - -from typing import List -from scrap.utils.types import CouncilType, Councilor, ScrapResult -from scrap.utils.requests import get_soup -import re - - -def scrap_yeonggwang( - url="https://www.ygcouncil.go.kr/bbs/content.php?co_id=councilors_curr#aside", -) -> ScrapResult: - """무안 페이지에서 의원 상세약력 스크랩 - - :param url: 의원 목록 사이트 url - :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - """ - - soup = get_soup(url, verify=False) - councilors: List[Councilor] = [] - mlist = soup.find_all("div", class_="councilors_curr2_wrap")[0] - - for profile in mlist.find_all("div", class_="subcon_body_txt", recursive=False): - info = profile.find("div", class_="ygmember_txt") - name = ( - info.find("h4").get_text(strip=True).split(" ")[0] - if info.find("h4").get_text(strip=True) - else "이름 정보 없음" - ) - - party_dd = info.find("p", class_="party_highlight") - party = "정당 정보 없음" - if party_dd: - party = party_dd.get_text(strip=True).replace("정당 : ", "") - councilors.append(Councilor(name=name, jdName=party)) - - return ScrapResult( - council_id="yeonggwang", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors, - ) - - -if __name__ == "__main__": - print(scrap_yeonggwang()) diff --git a/scrap/utils/scrap_args.json b/scrap/utils/scrap_args.json index 82b4ca6..e854932 100644 --- a/scrap/utils/scrap_args.json +++ b/scrap/utils/scrap_args.json @@ -608,6 +608,15 @@ "pty_elt": "span", "pty_cls": "itemContent" }, + "169": { + "pf_elt": "li", + "pf_cls": "item_box", + "pf_memlistelt": "div", + "pf_memlistcls": "submem", + "name_elt": "h5", + "pty_elt": "li", + "pty_cls": "item MP" + }, "171": { "pf_elt": "tr", "pf_memlistelt": "tbody", @@ -626,5 +635,22 @@ "name_elt": "li", "name_cls": "name", "pty_elt": "dl" + }, + "176": { + "pf_elt": "div", + "pf_cls": "profile", + "name_elt": "strong", + "name_cls": "name", + "pty_elt": "ul", + "pty_cls": "dot" + }, + "177": { + "pf_elt": "li", + "pf_cls": "item_box", + "pf_memlistelt": "ul", + "pf_memlistcls": "memlist", + "name_elt": "h5", + "name_cls": "dd", + "pty_elt": "dl" } } \ No newline at end of file diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index bee6477..19968bb 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -17,6 +17,7 @@ from scrap.local_councils.gangwon import * from scrap.local_councils.chungcheong import * from scrap.local_councils.jeolla import * +from scrap.local_councils.gyeongsang import * from scrap.local_councils import * from requests.exceptions import Timeout @@ -66,16 +67,19 @@ def main() -> None: 0 ) # 원하는 워크시트 선택 (0은 첫 번째 워크시트입니다.) # TODO - 홈페이지 위 charset=euc-kr 등을 인식해 바로 가져오기. - euc_kr = [6, 13, 16, 31, 72, 88, 112, 134, 154, 157, 163, 165, 167, 181, 197, 202] + euc_kr = [6, 13, 16, 31, 72, 88, 112, 134, 154, 157, 163, 165, 167, 176, 181, 197, 202, 222] special_functions = ( list(range(1, 57)) + [62, 63, 64, 88, 97, 103, 107] + list(range(113, 127)) + [132, 134, 140, 142, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, - 167, 170, 171, 172] + 167] + + list(range(177, 180)) + + [182, 183, 184, 186, 188, 189, 190, 191, 194, 195, 196, 198, 199, 201, 203, 206, 208, 209, 210] + + list(range(212, 221)) + [222, 223, 224, 226] ) - selenium_basic = [76, 78, 101, 173] - no_information = [106, 111, 172] + selenium_basic = [76, 78, 101, 169, 173, 177] + no_information = [18, 29, 106, 111, 172, 181, 185, 187, 197, 200, 204, 207] error_unsolved = [170, 171] errors = [] f = open(JSON_PATH, "r") @@ -89,7 +93,7 @@ def main() -> None: parse_error_times = 0 timeouts = 0 N = 226 - for n in range(1, 56): + for n in [189]:# range(1, N + 1): if n in no_information + error_unsolved: error_msg = "지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다. \ 다시 확인해보시겠어요?" \ @@ -113,7 +117,7 @@ def main() -> None: if n in special_functions: function_name = f"scrap_{n}" if hasattr(sys.modules[__name__], function_name): - function_to_call = getattr(sys.modules[__name__], function_name) + function_to_call = getattr(sys.modules[__name__], function_name) # type: ignore result = str(function_to_call(council_url, n, args=council_args).councilors) else: print("[API/spreadsheet] Error : No function found")