From a33d2682eba2fe0f61661ac199b730e2ae8e7c29 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Tue, 26 Mar 2019 18:21:11 -0400 Subject: [PATCH] fix args --- neuroner/__main__.py | 90 ++++++++++++++++---------------- neuroner/neuromodel.py | 113 ++++++++++------------------------------- setup.py | 2 +- 3 files changed, 72 insertions(+), 133 deletions(-) diff --git a/neuroner/__main__.py b/neuroner/__main__.py index e41be202..9f2c6d5b 100644 --- a/neuroner/__main__.py +++ b/neuroner/__main__.py @@ -21,51 +21,51 @@ def parse_arguments(arguments=None): arguments: arguments the arguments, optionally given as argument ''' - default_param = neuromodel._get_default_param() + # default_param = neuromodel._get_default_param() parser = argparse.ArgumentParser(description='''NeuroNER CLI''', formatter_class=RawTextHelpFormatter) - parser.add_argument('--parameters_filepath', required=False, default=os.path.join('.','parameters.ini'), help='The parameters file') - parser.add_argument('--character_embedding_dimension', required=False, default=default_param['character_embedding_dimension'], help='') - parser.add_argument('--character_lstm_hidden_state_dimension', required=False, default=default_param['character_lstm_hidden_state_dimension'], help='') - parser.add_argument('--check_for_digits_replaced_with_zeros', required=False, default=default_param['check_for_digits_replaced_with_zeros'], help='') - parser.add_argument('--check_for_lowercase', required=False, default=default_param['check_for_lowercase'], help='') - parser.add_argument('--dataset_text_folder', required=False, default=default_param['dataset_text_folder'], help='') - parser.add_argument('--debug', required=False, default=default_param['debug'], help='') - parser.add_argument('--dropout_rate', required=False, default=default_param['dropout_rate'], help='') - parser.add_argument('--experiment_name', required=False, default=default_param['experiment_name'], help='') - parser.add_argument('--freeze_token_embeddings', required=False, default=default_param['freeze_token_embeddings'], help='') - parser.add_argument('--gradient_clipping_value', required=False, default=default_param['gradient_clipping_value'], help='') - parser.add_argument('--learning_rate', required=False, default=default_param['learning_rate'], help='') - parser.add_argument('--load_only_pretrained_token_embeddings', required=False, default=default_param['load_only_pretrained_token_embeddings'], help='') - parser.add_argument('--load_all_pretrained_token_embeddings', required=False, default=default_param['load_all_pretrained_token_embeddings'], help='') - parser.add_argument('--main_evaluation_mode', required=False, default=default_param['main_evaluation_mode'], help='') - parser.add_argument('--maximum_number_of_epochs', required=False, default=default_param['maximum_number_of_epochs'], help='') - parser.add_argument('--number_of_cpu_threads', required=False, default=default_param['number_of_cpu_threads'], help='') - parser.add_argument('--number_of_gpus', required=False, default=default_param['number_of_gpus'], help='') - parser.add_argument('--optimizer', required=False, default=default_param['optimizer'], help='') - parser.add_argument('--output_folder', required=False, default=default_param['output_folder'], help='') - parser.add_argument('--patience', required=False, default=default_param['patience'], help='') - parser.add_argument('--plot_format', required=False, default=default_param['plot_format'], help='') - parser.add_argument('--pretrained_model_folder', required=False, default=default_param['pretrained_model_folder'], help='') - parser.add_argument('--reload_character_embeddings', required=False, default=default_param['reload_character_embeddings'], help='') - parser.add_argument('--reload_character_lstm', required=False, default=default_param['reload_character_lstm'], help='') - parser.add_argument('--reload_crf', required=False, default=default_param['reload_crf'], help='') - parser.add_argument('--reload_feedforward', required=False, default=default_param['reload_feedforward'], help='') - parser.add_argument('--reload_token_embeddings', required=False, default=default_param['reload_token_embeddings'], help='') - parser.add_argument('--reload_token_lstm', required=False, default=default_param['reload_token_lstm'], help='') - parser.add_argument('--remap_unknown_tokens_to_unk', required=False, default=default_param['remap_unknown_tokens_to_unk'], help='') - parser.add_argument('--spacylanguage', required=False, default=default_param['spacylanguage'], help='') - parser.add_argument('--tagging_format', required=False, default=default_param['tagging_format'], help='') - parser.add_argument('--token_embedding_dimension', required=False, default=default_param['token_embedding_dimension'], help='') - parser.add_argument('--token_lstm_hidden_state_dimension', required=False, default=default_param['token_lstm_hidden_state_dimension'], help='') - parser.add_argument('--token_pretrained_embedding_filepath', required=False, default=default_param['token_pretrained_embedding_filepath'], help='') - parser.add_argument('--tokenizer', required=False, default=default_param['tokenizer'], help='') - parser.add_argument('--train_model', required=False, default=default_param['train_model'], help='') - parser.add_argument('--use_character_lstm', required=False, default=default_param['use_character_lstm'], help='') - parser.add_argument('--use_crf', required=False, default=default_param['use_crf'], help='') - parser.add_argument('--use_pretrained_model', required=False, default=default_param['use_pretrained_model'], help='') - parser.add_argument('--verbose', required=False, default=default_param['verbose'], help='') + parser.add_argument('--parameters_filepath', required=False, default=None, help='The parameters file') + parser.add_argument('--character_embedding_dimension', required=False, default=None, help='') + parser.add_argument('--character_lstm_hidden_state_dimension', required=False, default=None, help='') + parser.add_argument('--check_for_digits_replaced_with_zeros', required=False, default=None, help='') + parser.add_argument('--check_for_lowercase', required=False, default=None, help='') + parser.add_argument('--dataset_text_folder', required=False, default=None, help='') + parser.add_argument('--debug', required=False, default=None, help='') + parser.add_argument('--dropout_rate', required=False, default=None, help='') + parser.add_argument('--experiment_name', required=False, default=None, help='') + parser.add_argument('--freeze_token_embeddings', required=False, default=None, help='') + parser.add_argument('--gradient_clipping_value', required=False, default=None, help='') + parser.add_argument('--learning_rate', required=False, default=None, help='') + parser.add_argument('--load_only_pretrained_token_embeddings', required=False, default=None, help='') + parser.add_argument('--load_all_pretrained_token_embeddings', required=False, default=None, help='') + parser.add_argument('--main_evaluation_mode', required=False, default=None, help='') + parser.add_argument('--maximum_number_of_epochs', required=False, default=None, help='') + parser.add_argument('--number_of_cpu_threads', required=False, default=None, help='') + parser.add_argument('--number_of_gpus', required=False, default=None, help='') + parser.add_argument('--optimizer', required=False, default=None, help='') + parser.add_argument('--output_folder', required=False, default=None, help='') + parser.add_argument('--patience', required=False, default=None, help='') + parser.add_argument('--plot_format', required=False, default=None, help='') + parser.add_argument('--pretrained_model_folder', required=False, default=None, help='') + parser.add_argument('--reload_character_embeddings', required=False, default=None, help='') + parser.add_argument('--reload_character_lstm', required=False, default=None, help='') + parser.add_argument('--reload_crf', required=False, default=None, help='') + parser.add_argument('--reload_feedforward', required=False, default=None, help='') + parser.add_argument('--reload_token_embeddings', required=False, default=None, help='') + parser.add_argument('--reload_token_lstm', required=False, default=None, help='') + parser.add_argument('--remap_unknown_tokens_to_unk', required=False, default=None, help='') + parser.add_argument('--spacylanguage', required=False, default=None, help='') + parser.add_argument('--tagging_format', required=False, default=None, help='') + parser.add_argument('--token_embedding_dimension', required=False, default=None, help='') + parser.add_argument('--token_lstm_hidden_state_dimension', required=False, default=None, help='') + parser.add_argument('--token_pretrained_embedding_filepath', required=False, default=None, help='') + parser.add_argument('--tokenizer', required=False, default=None, help='') + parser.add_argument('--train_model', required=False, default=None, help='') + parser.add_argument('--use_character_lstm', required=False, default=None, help='') + parser.add_argument('--use_crf', required=False, default=None, help='') + parser.add_argument('--use_pretrained_model', required=False, default=None, help='') + parser.add_argument('--verbose', required=False, default=None, help='') # load data to local folder parser.add_argument('--fetch_data', required=False, default='', help='') @@ -78,9 +78,9 @@ def parse_arguments(arguments=None): sys.exit(0) # http://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary - arguments = vars(arguments) - - return arguments + arguments = vars(arguments) + + return {k: v for k, v in arguments.items() if v is not None} def main(argv=sys.argv): ''' NeuroNER main method diff --git a/neuroner/neuromodel.py b/neuroner/neuromodel.py index b1689bab..99fa67e5 100644 --- a/neuroner/neuromodel.py +++ b/neuroner/neuromodel.py @@ -97,63 +97,6 @@ def _fetch(name,content_type=None): msg = "{} not found in {} package.".format(name,package_name) print(msg) - # if pkg_resources.resource_isdir(package_name, resource_path): - - # # get list of files - # files = pkg_resources.resource_listdir(package_name, resource_path) - # fileset = {} - - # # load data - # for f in files: - # resource_path = '/'.join((content_type,name,f)) - # fileset[f] = pkg_resources.resource_string(package_name, - # resource_path) - - # # create containing dir - # container_dir = os.path.join('.',content_type,name) - - # # write to local dir - # if os.path.isdir(container_dir): - # msg = "Directory '{}' already exists.".format(container_dir) - # print(msg) - # else: - # _make_local_dir(container_dir) - # for f,contents in fileset.items(): - # _write_file(os.path.join(container_dir,f),contents) - - # else: - # msg = "{} not found in {} package.".format(name,package_name) - # print(msg) - -# def _make_local_dir(path): -# """ -# Make any required local directories to prepare for downloading. - -# Args: -# path (str): name of directory to create -# """ -# if os.path.isdir(path): -# msg = "Directory '{}' already exists.".format(path) -# warnings.warn(msg) -# else: -# os.makedirs(path) -# print('Created directory: {}'.format(path)) - -# def _write_file(filename,content): -# """ -# Write to file. Assumes directory already exists. - -# Args: -# filename (str): full path to file -# content (obj): data to write to file. -# """ -# if os.path.isfile(filename): -# msg = "File '{}' already exists.".format(filename) -# warnings.warn(msg) -# else: -# with open(filename, 'wb') as f: -# f.write(content) - def _get_default_param(): """ Get the default parameters. @@ -255,46 +198,53 @@ def _clean_param_dtypes(param): def load_parameters(**kwargs): ''' - Load parameters from the ini file if specified, take into account any command + Load parameters from the ini file if specified, take into account any command line argument, and ensure that each parameter is cast to the correct type. - + Command line arguments take precedence over parameters specified in the parameter file. ''' - param = _get_default_param() + param = {} + param_default = _get_default_param() # use parameter path if provided, otherwise use default - try: - parameters_filepath = kwargs['parameters_filepath'] + try: + if kwargs['parameters_filepath']: + parameters_filepath = kwargs['parameters_filepath'] except: - parameters_filepath = param['parameters_filepath'] + parameters_filepath = param_default['parameters_filepath'] param_config, param_file_txt = _get_config_param(parameters_filepath) # Parameter file settings should overwrite default settings - for k,v in param_config.items(): - param[k] = v + for k, v in param_config.items(): + param[k] = v # Command line args should overwrite settings in the parameter file - for k,v in kwargs.items(): + for k, v in kwargs.items(): param[k] = v - # if loading a pretrained model, set to pretrain hyperparameters + # Any missing args can be set to default + for k, v in param_default.items(): + if k not in param: + param[k] = param_default[k] + + # if loading a pretrained model, set to pretrain hyperparameters if param['use_pretrained_model']: - pretrain_path = os.path.join(param['pretrained_model_folder'], + pretrain_path = os.path.join(param['pretrained_model_folder'], 'parameters.ini') if os.path.isfile(pretrain_path): pretrain_param, _ = _get_config_param(pretrain_path) - - pretrain_list = ['use_character_lstm', 'character_embedding_dimension', - 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', + + pretrain_list = ['use_character_lstm', 'character_embedding_dimension', + 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', 'token_lstm_hidden_state_dimension', 'use_crf'] for name in pretrain_list: if str(param[name]) != str(pretrain_param[name]): - msg = """WARNING: parameter '{0}' was overwritten from '{1}' to '{2}' - for consistency with the pretrained model""".format(name, + msg = """WARNING: parameter '{0}' was overwritten from '{1}' to '{2}' + for consistency with the pretrained model""".format(name, param[name], pretrain_param[name]) print(msg) param[name] = pretrain_param[name] @@ -481,8 +431,8 @@ class NeuroNER(object): prediction_count = 0 - def __init__(self,**kwargs): - + def __init__(self, **kwargs): + # Set parameters self.parameters, self.conf_parameters = load_parameters(**kwargs) self.dataset_filepaths, self.dataset_brat_folders = self._get_valid_dataset_filepaths(self.parameters) @@ -491,7 +441,7 @@ def __init__(self,**kwargs): # Load dataset self.modeldata = dataset.Dataset(verbose=self.parameters['verbose'], debug=self.parameters['debug']) token_to_vector = self.modeldata.load_dataset(self.dataset_filepaths, self.parameters) - + # Launch session. Automatically choose a device # if the specified one doesn't exist session_conf = tf.ConfigProto( @@ -533,17 +483,6 @@ def _create_stats_graph_folder(self, parameters): stats_graph_folder = os.path.join(parameters['output_folder'], model_name) utils.create_folder_if_not_exists(stats_graph_folder) return stats_graph_folder, experiment_timestamp - - def _load_parameters(self,**kwargs): - """ - Load parameters from the ini file if specified, take into account any command line argument, - and ensure that each parameter is cast to the correct type. - - Command line arguments take precedence over parameters specified in the parameter file. - """ - param, param_file_txt = load_parameters(**kwargs) - - return param, param_file_txt def _get_valid_dataset_filepaths(self, parameters, dataset_types=['train', 'valid', 'test', 'deploy']): """ diff --git a/setup.py b/setup.py index 95e8862e..25745fbf 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version='1.0-dev3', + version='1.0.4', description='NeuroNER', long_description=long_description,