Skip to content

Commit

Permalink
pickle-depickle tfidf test now represents different executions (#223)
Browse files Browse the repository at this point in the history
WordAnalyser reset between calls to main() - will catch if stopwords
etc not populated
  • Loading branch information
IanGrimstead authored and IanGrimstead committed Mar 29, 2019
1 parent 7a89cb6 commit 1d1cbe2
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions tests/test_pygrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pygrams
from scripts import FilePaths
from scripts.text_processing import WordAnalyzer
from scripts.utils.pygrams_exception import PygramsException


Expand Down Expand Up @@ -227,6 +228,13 @@ def test_simple_output_tfidf_pickle_and_unpickle(self, mock_path_isfile, mock_ou
args = ['-ds', self.data_source_name, '--date_header', 'publication_date', '--max_document_frequency', '1.0']
pygrams.main(args)

# reset static object
WordAnalyzer.tokenizer = None
WordAnalyzer.preprocess = None
WordAnalyzer.ngram_range = None
WordAnalyzer.stemmed_stop_word_set_n = None
WordAnalyzer.stemmed_stop_word_set_uni = None

# Fail if original data frame is requested from disc
def factory_read_pickle_fake(pickle_file_name):
self.fail(f'Should not be reading {pickle_file_name} via a factory if TFIDF was requested from pickle')
Expand Down

0 comments on commit 1d1cbe2

Please sign in to comment.