Skip to content

Commit

Permalink
Merge pull request #548 from MuilaerteJunior/main
Browse files Browse the repository at this point in the history
Creating the '--collect' parameter to just collect jobs information
  • Loading branch information
feder-cr authored Oct 16, 2024
2 parents a11381f + 5b55fad commit b4bb280
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 3 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,11 @@ Using this folder as a guide can be particularly helpful for:
```bash
python main.py --resume /path/to/your/resume.pdf
```
- **Using the colled mode:**
If you want to collect job data only to perform any type of data analytics you can use the bot with the `--collect` option. This will store in output/data.json file all data found from linkedin jobs offers.
```bash
python main.py --collect
```


### Troubleshooting Common Issues
Expand Down
14 changes: 11 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,12 @@ def create_and_run_bot(parameters, llm_api_key):
bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager)
bot.set_parameters(parameters)
bot.start_login()
bot.start_apply()
if (parameters['collectMode'] == True):
print('Collecting')
bot.start_collect_data()
else:
print('Applying')
bot.start_apply()
except WebDriverException as e:
logger.error(f"WebDriver error occurred: {e}")
except Exception as e:
Expand All @@ -188,7 +193,8 @@ def create_and_run_bot(parameters, llm_api_key):

@click.command()
@click.option('--resume', type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), help="Path to the resume PDF file")
def main(resume: Path = None):
@click.option('--collect', is_flag=True, help="Only collects data job information into data.json file")
def main(collect: False, resume: Path = None):
try:
data_folder = Path("data_folder")
secrets_file, config_file, plain_text_resume_file, output_folder = FileManager.validate_data_folder(data_folder)
Expand All @@ -198,11 +204,13 @@ def main(resume: Path = None):

parameters['uploads'] = FileManager.file_paths_to_dict(resume, plain_text_resume_file)
parameters['outputFileDirectory'] = output_folder
parameters['collectMode'] = collect

create_and_run_bot(parameters, llm_api_key)
except ConfigError as ce:
logger.error(f"Configuration error: {str(ce)}")
logger.error(f"Refer to the configuration guide for troubleshooting.")
logger.error(f"Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk?tab=readme-ov-file#configuration {str(ce)}")

except FileNotFoundError as fnf:
logger.error(f"File not found: {str(fnf)}")
logger.error("Ensure all required files are present in the data folder.")
Expand Down
6 changes: 6 additions & 0 deletions src/aihawk_bot_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def start_apply(self):
self.state.validate_state(['logged_in', 'job_application_profile_set', 'gpt_answerer_set', 'parameters_set'])
self.apply_component.start_applying()
logger.debug("Apply process started successfully")

def start_collect_data(self):
logger.debug("Starting collecting data process")
self.state.validate_state(['logged_in', 'job_application_profile_set', 'gpt_answerer_set', 'parameters_set'])
self.apply_component.start_collecting_data()
logger.debug("Collecting data process started successfully")

def _validate_non_empty(self, value, name):
logger.debug(f"Validating that {name} is not empty")
Expand Down
71 changes: 71 additions & 0 deletions src/aihawk_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,51 @@ def set_resume_generator_manager(self, resume_generator_manager):
logger.debug("Setting resume generator manager")
self.resume_generator_manager = resume_generator_manager

def start_collecting_data(self):
searches = list(product(self.positions, self.locations))
random.shuffle(searches)
page_sleep = 0
minimum_time = 60 * 5
minimum_page_time = time.time() + minimum_time

for position, location in searches:
location_url = "&location=" + location
job_page_number = -1
utils.printyellow(f"Collecting data for {position} in {location}.")
try:
while True:
page_sleep += 1
job_page_number += 1
utils.printyellow(f"Going to job page {job_page_number}")
self.next_job_page(position, location_url, job_page_number)
time.sleep(random.uniform(1.5, 3.5))
utils.printyellow("Starting the collecting process for this page")
self.read_jobs()
utils.printyellow("Collecting data on this page has been completed!")

time_left = minimum_page_time - time.time()
if time_left > 0:
utils.printyellow(f"Sleeping for {time_left} seconds.")
time.sleep(time_left)
minimum_page_time = time.time() + minimum_time
if page_sleep % 5 == 0:
sleep_time = random.randint(1, 5)
utils.printyellow(f"Sleeping for {sleep_time / 60} minutes.")
time.sleep(sleep_time)
page_sleep += 1
except Exception:
pass
time_left = minimum_page_time - time.time()
if time_left > 0:
utils.printyellow(f"Sleeping for {time_left} seconds.")
time.sleep(time_left)
minimum_page_time = time.time() + minimum_time
if page_sleep % 5 == 0:
sleep_time = random.randint(50, 90)
utils.printyellow(f"Sleeping for {sleep_time / 60} minutes.")
time.sleep(sleep_time)
page_sleep += 1

def start_applying(self):
logger.debug("Starting job application process")
self.easy_applier_component = AIHawkEasyApplier(self.driver, self.resume_path, self.set_old_answers,
Expand Down Expand Up @@ -214,6 +259,32 @@ def get_jobs_from_page(self):
logger.error(f"Error while fetching job elements: {e}")
return []

def read_jobs(self):
try:
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
raise Exception("No more jobs on this page")
except NoSuchElementException:
pass

job_results = self.driver.find_element(By.CLASS_NAME, "jobs-search-results-list")
utils.scroll_slow(self.driver, job_results)
utils.scroll_slow(self.driver, job_results, step=300, reverse=True)
job_list_elements = self.driver.find_elements(By.CLASS_NAME, 'scaffold-layout__list-container')[0].find_elements(By.CLASS_NAME, 'jobs-search-results__list-item')
if not job_list_elements:
raise Exception("No job class elements found on page")
job_list = [Job(*self.extract_job_information_from_tile(job_element)) for job_element in job_list_elements]
for job in job_list:
if self.is_blacklisted(job.title, job.company, job.link):
utils.printyellow(f"Blacklisted {job.title} at {job.company}, skipping...")
self.write_to_file(job, "skipped")
continue
try:
self.write_to_file(job,'data')
except Exception as e:
self.write_to_file(job, "failed")
continue

def apply_jobs(self):
try:
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
Expand Down

0 comments on commit b4bb280

Please sign in to comment.