Removed unused parameters and synchronised variable names (#273)

datasciencecampus · May 20, 2019 · b851109 · b851109
1 parent e053907
commit b851109
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 13 deletions.
diff --git a/pygrams.py b/pygrams.py
@@ -164,15 +164,15 @@ def main(supplied_args):
     doc_source_file_name = os.path.join(args.path, args.doc_source)
 
     if args.input_tfidf is None:
-        pickled_tf_idf_path = None
+        pickled_base_file_name = None
     else:
-        pickled_tf_idf_path = os.path.join('outputs', 'tfidf', args.input_tfidf)
+        pickled_base_file_name = os.path.join('outputs', 'tfidf', args.input_tfidf)
 
     pipeline = Pipeline(doc_source_file_name, docs_mask_dict, pick_method=args.pick,
-                        ngram_range=(args.min_ngrams, args.max_ngrams), normalize_rows=args.normalize_doc_length,
-                        text_header=args.text_header, max_df=args.max_document_frequency,
-                        term_counts=args.term_counts, user_ngrams=args.search_terms, terms_threshold=args.search_terms_threshold,
-                        prefilter_terms=args.prefilter_terms, pickled_base_file_name=pickled_tf_idf_path,
+                        ngram_range=(args.min_ngrams, args.max_ngrams), text_header=args.text_header,
+                        term_counts=args.term_counts, pickled_base_file_name=pickled_base_file_name,
+                        max_df=args.max_document_frequency, user_ngrams=args.search_terms,
+                        prefilter_terms=args.prefilter_terms, terms_threshold=args.search_terms_threshold,
                         output_name=args.outputs_name, emerging_technology=args.emerging_technology)
 
     pipeline.output(outputs, wordcloud_title=args.wordcloud_title, outname=args.outputs_name,

diff --git a/scripts/pipeline.py b/scripts/pipeline.py
@@ -22,10 +22,9 @@
 
 
 class Pipeline(object):
-    def __init__(self, data_filename, docs_mask_dict, pick_method='sum', ngram_range=(1, 3),
-                 normalize_rows=False, text_header='abstract', term_counts=False,
-                 pickled_base_file_name=None, max_df=0.1, user_ngrams=None, prefilter_terms=0, terms_threshold=None,
-                 output_name=None, emerging_technology=None):
+    def __init__(self, data_filename, docs_mask_dict, pick_method='sum', ngram_range=(1, 3), text_header='abstract',
+                 term_counts=False, pickled_base_file_name=None, max_df=0.1, user_ngrams=None, prefilter_terms=0,
+                 terms_threshold=None, output_name=None, emerging_technology=None):
 
         # load data
         self.__data_filename = data_filename

diff --git a/tests/test_tfidf_reduce.py b/tests/test_tfidf_reduce.py
@@ -34,9 +34,8 @@ def setUpClass(cls):
 
         filename = os.path.join('tests', 'data', 'USPTO-random-100.csv')
 
-        cls.__pipeline = Pipeline(filename, docs_mask_dict, ngram_range=ngram_range,
-                                  text_header='abstract', term_counts=True,
-                                  max_df=max_df, output_name='test')
+        cls.__pipeline = Pipeline(filename, docs_mask_dict, ngram_range=ngram_range, text_header='abstract',
+                                  term_counts=True, max_df=max_df, output_name='test')
 
         cls.__term_score_tuples = cls.__pipeline.term_score_tuples