Skip to content

Commit

Permalink
Removed unused parameters and synchronised variable names (#273)
Browse files Browse the repository at this point in the history
  • Loading branch information
IanGrimstead authored and IanGrimstead committed May 20, 2019
1 parent e053907 commit b851109
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 13 deletions.
12 changes: 6 additions & 6 deletions pygrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,15 @@ def main(supplied_args):
doc_source_file_name = os.path.join(args.path, args.doc_source)

if args.input_tfidf is None:
pickled_tf_idf_path = None
pickled_base_file_name = None
else:
pickled_tf_idf_path = os.path.join('outputs', 'tfidf', args.input_tfidf)
pickled_base_file_name = os.path.join('outputs', 'tfidf', args.input_tfidf)

pipeline = Pipeline(doc_source_file_name, docs_mask_dict, pick_method=args.pick,
ngram_range=(args.min_ngrams, args.max_ngrams), normalize_rows=args.normalize_doc_length,
text_header=args.text_header, max_df=args.max_document_frequency,
term_counts=args.term_counts, user_ngrams=args.search_terms, terms_threshold=args.search_terms_threshold,
prefilter_terms=args.prefilter_terms, pickled_base_file_name=pickled_tf_idf_path,
ngram_range=(args.min_ngrams, args.max_ngrams), text_header=args.text_header,
term_counts=args.term_counts, pickled_base_file_name=pickled_base_file_name,
max_df=args.max_document_frequency, user_ngrams=args.search_terms,
prefilter_terms=args.prefilter_terms, terms_threshold=args.search_terms_threshold,
output_name=args.outputs_name, emerging_technology=args.emerging_technology)

pipeline.output(outputs, wordcloud_title=args.wordcloud_title, outname=args.outputs_name,
Expand Down
7 changes: 3 additions & 4 deletions scripts/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@


class Pipeline(object):
def __init__(self, data_filename, docs_mask_dict, pick_method='sum', ngram_range=(1, 3),
normalize_rows=False, text_header='abstract', term_counts=False,
pickled_base_file_name=None, max_df=0.1, user_ngrams=None, prefilter_terms=0, terms_threshold=None,
output_name=None, emerging_technology=None):
def __init__(self, data_filename, docs_mask_dict, pick_method='sum', ngram_range=(1, 3), text_header='abstract',
term_counts=False, pickled_base_file_name=None, max_df=0.1, user_ngrams=None, prefilter_terms=0,
terms_threshold=None, output_name=None, emerging_technology=None):

# load data
self.__data_filename = data_filename
Expand Down
5 changes: 2 additions & 3 deletions tests/test_tfidf_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ def setUpClass(cls):

filename = os.path.join('tests', 'data', 'USPTO-random-100.csv')

cls.__pipeline = Pipeline(filename, docs_mask_dict, ngram_range=ngram_range,
text_header='abstract', term_counts=True,
max_df=max_df, output_name='test')
cls.__pipeline = Pipeline(filename, docs_mask_dict, ngram_range=ngram_range, text_header='abstract',
term_counts=True, max_df=max_df, output_name='test')

cls.__term_score_tuples = cls.__pipeline.term_score_tuples

Expand Down

0 comments on commit b851109

Please sign in to comment.