From e0358197d8a8f3bca0335e548baf4075f7bf74b3 Mon Sep 17 00:00:00 2001 From: queukat <75810528+queukat@users.noreply.github.com> Date: Sat, 5 Oct 2024 17:49:11 +0200 Subject: [PATCH] Update main.py --- main.py | 164 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 139 insertions(+), 25 deletions(-) diff --git a/main.py b/main.py index 9eba90d6f..5fb94648f 100644 --- a/main.py +++ b/main.py @@ -20,9 +20,11 @@ # Suppress stderr sys.stderr = open(os.devnull, 'w') + class ConfigError(Exception): pass + class ConfigValidator: @staticmethod def validate_email(email: str) -> bool: @@ -38,78 +40,127 @@ def validate_yaml_file(yaml_path: Path) -> dict: except FileNotFoundError: raise ConfigError(f"File not found: {yaml_path}") - def validate_config(config_yaml_path: Path) -> dict: + """ + Validate and load configuration from a YAML file. + + This function validates the configuration file to ensure that all required keys are present + and that the types of their values are correct. It checks for required sections like 'remote', + 'experience_level', 'jobTypes', 'positions', 'locations', and 'personal_information', among others. + If any key is missing or has an invalid type, it raises a ConfigError. + + Args: + config_yaml_path (Path): Path to the YAML configuration file. + + Returns: + dict: A dictionary containing the validated configuration parameters. + + Raises: + ConfigError: If a required key is missing or has an invalid type. + """ + + logger.info(f"Loading and validating config file: {config_yaml_path}") + + # Load the YAML configuration file parameters = ConfigValidator.validate_yaml_file(config_yaml_path) + logger.debug(f"Loaded config file content: {parameters}") + + # Define required keys and their expected types required_keys = { 'remote': bool, - 'experienceLevel': dict, + 'experience_level': dict, 'jobTypes': dict, 'date': dict, 'positions': list, 'locations': list, 'distance': int, - 'companyBlacklist': list, - 'titleBlacklist': list, + 'company_blacklist': list, + 'title_blacklist': list, 'llm_model_type': str, 'llm_model': str + } + # Validate each required key for key, expected_type in required_keys.items(): if key not in parameters: - if key in ['companyBlacklist', 'titleBlacklist']: + # Handle optional blacklist keys by setting them as empty lists if missing + if key in ['company_blacklist', 'title_blacklist']: parameters[key] = [] + logger.debug(f"Optional key '{key}' missing, set to empty list.") else: + logger.error(f"Missing key '{key}' in config file.") raise ConfigError(f"Missing or invalid key '{key}' in config file {config_yaml_path}") elif not isinstance(parameters[key], expected_type): - if key in ['companyBlacklist', 'titleBlacklist'] and parameters[key] is None: + # Allow None values for blacklists, but otherwise check type validity + if key in ['company_blacklist', 'title_blacklist'] and parameters[key] is None: parameters[key] = [] + logger.debug(f"Key '{key}' was None, set to empty list.") else: - raise ConfigError(f"Invalid type for key '{key}' in config file {config_yaml_path}. Expected {expected_type}.") + logger.error(f"Invalid type for key '{key}' in config file. Expected {expected_type}, but got {type(parameters[key])}.") + raise ConfigError(f"Invalid type for key '{key}' in config file {config_yaml_path}. Expected {expected_type}, but got {type(parameters[key])}.") + # Validate 'experience_level' section experience_levels = ['internship', 'entry', 'associate', 'mid-senior level', 'director', 'executive'] for level in experience_levels: - if not isinstance(parameters['experienceLevel'].get(level), bool): + if not isinstance(parameters['experience_level'].get(level), bool): + logger.error(f"Invalid value for experience level '{level}'. Expected a boolean (True/False). Current value: {parameters['experience_level'].get(level)}") raise ConfigError(f"Experience level '{level}' must be a boolean in config file {config_yaml_path}") + # Validate 'jobTypes' section job_types = ['full-time', 'contract', 'part-time', 'temporary', 'internship', 'other', 'volunteer'] for job_type in job_types: if not isinstance(parameters['jobTypes'].get(job_type), bool): + logger.error(f"Invalid value for job type '{job_type}'. Expected a boolean (True/False). Current value: {parameters['jobTypes'].get(job_type)}") raise ConfigError(f"Job type '{job_type}' must be a boolean in config file {config_yaml_path}") + # Validate 'date' filters section date_filters = ['all time', 'month', 'week', '24 hours'] for date_filter in date_filters: if not isinstance(parameters['date'].get(date_filter), bool): + logger.error(f"Invalid value for date filter '{date_filter}'. Expected a boolean (True/False). Current value: {parameters['date'].get(date_filter)}") raise ConfigError(f"Date filter '{date_filter}' must be a boolean in config file {config_yaml_path}") + # Validate 'positions' list if not all(isinstance(pos, str) for pos in parameters['positions']): + logger.error(f"Invalid value in 'positions'. All entries must be strings. Current values: {parameters['positions']}") raise ConfigError(f"'positions' must be a list of strings in config file {config_yaml_path}") + + # Validate 'locations' list if not all(isinstance(loc, str) for loc in parameters['locations']): + logger.error(f"Invalid value in 'locations'. All entries must be strings. Current values: {parameters['locations']}") raise ConfigError(f"'locations' must be a list of strings in config file {config_yaml_path}") + # Validate 'distance' field approved_distances = {0, 5, 10, 25, 50, 100} if parameters['distance'] not in approved_distances: + logger.error(f"Invalid distance value '{parameters['distance']}'. Must be one of {approved_distances}.") raise ConfigError(f"Invalid distance value in config file {config_yaml_path}. Must be one of: {approved_distances}") - for blacklist in ['companyBlacklist', 'titleBlacklist']: + for blacklist in ['company_blacklist', 'title_blacklist']: if not isinstance(parameters.get(blacklist), list): raise ConfigError(f"'{blacklist}' must be a list in config file {config_yaml_path}") if parameters[blacklist] is None: parameters[blacklist] = [] - return parameters - + logger.info("Configuration validated successfully.") + return parameters @staticmethod def validate_secrets(secrets_yaml_path: Path) -> tuple: secrets = ConfigValidator.validate_yaml_file(secrets_yaml_path) - mandatory_secrets = ['llm_api_key'] + mandatory_secrets = ['email', 'password','llm_api_key'] for secret in mandatory_secrets: if secret not in secrets: raise ConfigError(f"Missing secret '{secret}' in file {secrets_yaml_path}") + if not ConfigValidator.validate_email(secrets['email']): + raise ConfigError(f"Invalid email format in secrets file {secrets_yaml_path}.") + if not secrets['password']: + raise ConfigError(f"Password cannot be empty in secrets file {secrets_yaml_path}.") + return secrets['email'], str(secrets['password']), secrets['llm_api_key'] if not secrets['llm_api_key']: raise ConfigError(f"llm_api_key cannot be empty in secrets file {secrets_yaml_path}.") return secrets['llm_api_key'] @@ -117,7 +168,9 @@ def validate_secrets(secrets_yaml_path: Path) -> tuple: class FileManager: @staticmethod def find_file(name_containing: str, with_extension: str, at_path: Path) -> Path: - return next((file for file in at_path.iterdir() if name_containing.lower() in file.name.lower() and file.suffix.lower() == with_extension.lower()), None) + return next((file for file in at_path.iterdir() if + name_containing.lower() in file.name.lower() and file.suffix.lower() == with_extension.lower()), + None) @staticmethod def validate_data_folder(app_data_folder: Path) -> tuple: @@ -132,7 +185,9 @@ def validate_data_folder(app_data_folder: Path) -> tuple: output_folder = app_data_folder / 'output' output_folder.mkdir(exist_ok=True) - return (app_data_folder / 'secrets.yaml', app_data_folder / 'config.yaml', app_data_folder / 'plain_text_resume.yaml', output_folder) + return ( + app_data_folder / 'secrets.yaml', app_data_folder / 'config.yaml', app_data_folder / 'plain_text_resume.yaml', + output_folder) @staticmethod def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) -> dict: @@ -148,62 +203,121 @@ def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) - return result + def init_browser() -> webdriver.Chrome: try: - options = chrome_browser_options() service = ChromeService(ChromeDriverManager().install()) return webdriver.Chrome(service=service, options=options) except Exception as e: raise RuntimeError(f"Failed to initialize browser: {str(e)}") -def create_and_run_bot(parameters, llm_api_key): + +def create_and_run_bot(email, password, parameters, llm_api_key): try: + logger.info("Starting bot initialization...") + logger.debug(f"Email: {email}") + logger.debug(f"Parameters: {parameters}") + logger.debug(f"LLM API Key: {llm_api_key}") + style_manager = StyleManager() + logger.debug("StyleManager initialized successfully.") + resume_generator = ResumeGenerator() + logger.debug("ResumeGenerator initialized successfully.") + + logger.info("Reading plain text resume file...") with open(parameters['uploads']['plainTextResume'], "r", encoding='utf-8') as file: plain_text_resume = file.read() + logger.debug(f"Plain text resume loaded: {plain_text_resume[:100]}...") # Логируем первые 100 символов + resume_object = Resume(plain_text_resume) - resume_generator_manager = FacadeManager(llm_api_key, style_manager, resume_generator, resume_object, Path("data_folder/output")) + logger.debug(f"Resume object created: {resume_object}") + + logger.info("Creating FacadeManager for resume generation...") + resume_generator_manager = FacadeManager( + llm_api_key, style_manager, resume_generator, resume_object, Path("data_folder/output") + ) + logger.debug("FacadeManager initialized successfully.") - # generate resume only if no resume flag was provided - if "resume" not in parameters["uploads"]: - os.system("cls" if os.name == "nt" else "clear") - resume_generator_manager.choose_style() - os.system("cls" if os.name == "nt" else "clear") + os.system('cls' if os.name == 'nt' else 'clear') + logger.info("Choosing resume style...") + resume_generator_manager.choose_style() + logger.info("Resume style chosen successfully.") + os.system('cls' if os.name == 'nt' else 'clear') + logger.info("Creating JobApplicationProfile object...") job_application_profile_object = JobApplicationProfile(plain_text_resume) + logger.debug(f"JobApplicationProfile created: {job_application_profile_object}") + logger.info("Initializing the browser...") browser = init_browser() + logger.debug("Browser initialized successfully.") + + logger.info("Initializing login component...") login_component = AIHawkAuthenticator(browser) + logger.debug(f"Login component created: {login_component}") + + logger.info("Initializing job application component...") apply_component = AIHawkJobManager(browser) + logger.debug(f"Job application component created: {apply_component}") + + logger.info("Initializing GPT Answerer component...") gpt_answerer_component = GPTAnswerer(parameters, llm_api_key) + logger.debug(f"GPT Answerer component created: {gpt_answerer_component}") + + logger.info("Creating AIHawkBotFacade object...") bot = AIHawkBotFacade(login_component, apply_component) + logger.debug(f"Bot facade created: {bot}") + + logger.info("Setting secrets for the bot...") + bot.set_secrets(email, password) + logger.info("Secrets set successfully.") + + logger.info("Setting job application profile and resume...") bot.set_job_application_profile_and_resume(job_application_profile_object, resume_object) + logger.info("Job application profile and resume set successfully.") + + logger.info("Setting GPT Answerer and resume generator...") bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager) + logger.info("GPT Answerer and resume generator set successfully.") + + logger.info("Setting additional parameters for the bot...") bot.set_parameters(parameters) + logger.info("Parameters set successfully.") + + logger.info("Starting bot login process...") bot.start_login() + logger.info("Login process completed successfully.") + + logger.info("Starting job application process...") bot.start_apply() + logger.info("Job application process completed successfully.") + except WebDriverException as e: logger.error(f"WebDriver error occurred: {e}") except Exception as e: + logger.error("An unexpected error occurred in create_and_run_bot:") + logger.exception(e) raise RuntimeError(f"Error running the bot: {str(e)}") + @click.command() -@click.option('--resume', type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), help="Path to the resume PDF file") +@click.option('--resume', type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), + help="Path to the resume PDF file") def main(resume: Path = None): try: data_folder = Path("data_folder") secrets_file, config_file, plain_text_resume_file, output_folder = FileManager.validate_data_folder(data_folder) parameters = ConfigValidator.validate_config(config_file) - llm_api_key = ConfigValidator.validate_secrets(secrets_file) + email, password, llm_api_key = ConfigValidator.validate_secrets(secrets_file) parameters['uploads'] = FileManager.file_paths_to_dict(resume, plain_text_resume_file) parameters['outputFileDirectory'] = output_folder - create_and_run_bot(parameters, llm_api_key) + create_and_run_bot(email, password, parameters, llm_api_key) except ConfigError as ce: logger.error(f"Configuration error: {str(ce)}") logger.error(f"Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/AIHawk_AIHawk_automatic_job_application/blob/main/readme.md#configuration {str(ce)}")