Merge pull request #334 from nickjcroucher/fix_tests

Fixes to ensure code passes CI tests
nickjcroucher · Mar 2, 2022 · a9d5dcd · a9d5dcd
2 parents 51ef916 + 2f9f80c
commit a9d5dcd
Show file tree

Hide file tree

Showing 12 changed files with 167 additions and 152 deletions.
diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@ autoreconf -i
 make
 [sudo] make install
 cd python
-[sudo] python3 setup.py install
+[sudo] python3 -m pip install .
 ```
 Use `sudo` to install Gubbins system-wide. If you don't have the permissions, run `configure` with a prefix to install Gubbins in your home directory.
 

diff --git a/python/gubbins/pyjar.py b/python/gubbins/pyjar.py
@@ -584,8 +584,7 @@ def get_base_patterns(prefix, verbose, threads = 1):
     with open(base_positions_fn, 'r') as positions_file:
         for line in positions_file:
             array_of_position_arrays.append(list(map(int,line.rstrip().split(','))))
-    array_max = max([sublist[-1] for sublist in array_of_position_arrays]) + 1
-
+    array_max = max([max(sublist) for sublist in array_of_position_arrays]) + 1
     # Record timing
 
     t2=time.process_time()
@@ -646,12 +645,8 @@ def reconstruct_alignment_column(column_indices,
 
 
     # Extract information for iterations
-    if threads == 1:
-        columns = base_patterns
-        column_positions = base_pattern_positions
-    else:
-        column_positions = convert_to_square_numpy_array(base_pattern_positions)
-        columns = base_patterns[column_indices]
+    column_positions = convert_to_square_numpy_array(base_pattern_positions)
+    columns = base_patterns[column_indices]
 
 
     ### TIMING

diff --git a/python/gubbins/tests/data/multiple_recombinations.recombination_predictions.embl b/python/gubbins/tests/data/multiple_recombinations.recombination_predictions.embl
@@ -0,0 +1,30 @@
+FT   misc_feature    29..49
+FT                   /node="Node_4->Node_3"
+FT                   /neg_log_likelihood="4.955311"
+FT                   /colour="2"
+FT                   /taxa="  sequence_7 sequence_9 sequence_8"
+FT                   /SNP_count="21"
+FT   misc_feature    51..84
+FT                   /node="Node_5->Node_4"
+FT                   /neg_log_likelihood="10.195830"
+FT                   /colour="2"
+FT                   /taxa=" sequence_6   sequence_7 sequence_9 sequence_8"
+FT                   /SNP_count="30"
+FT   misc_feature    51..84
+FT                   /node="Node_6->sequence_1"
+FT                   /neg_log_likelihood="8.046578"
+FT                   /colour="4"
+FT                   /taxa="sequence_1"
+FT                   /SNP_count="30"
+FT   misc_feature    124..201
+FT                   /node="Node_9->sequence_10"
+FT                   /neg_log_likelihood="40.155361"
+FT                   /colour="4"
+FT                   /taxa="sequence_10"
+FT                   /SNP_count="78"
+FT   misc_feature    51..84
+FT                   /node="Node_9->Node_8"
+FT                   /neg_log_likelihood="10.195830"
+FT                   /colour="2"
+FT                   /taxa="  sequence_3 sequence_4  sequence_2   sequence_5  sequence_6   sequence_7 sequence_9 sequence_8 sequence_1"
+FT                   /SNP_count="30"
diff --git a/python/gubbins/tests/data/preprocessfasta/fasta_list_creator.sh b/python/gubbins/tests/data/preprocessfasta/fasta_list_creator.sh
diff --git a/python/gubbins/tests/data/preprocessfasta/ska_fasta_list.txt b/python/gubbins/tests/data/preprocessfasta/ska_fasta_list.txt
diff --git a/python/gubbins/tests/data/test_valid_output.csv b/python/gubbins/tests/data/test_valid_output.csv
@@ -1,5 +1,5 @@
-isolate,a,A,t,T,c,C,g,G,N,gap
-sequence1,0,10,0,0,0,0,0,0,0,0
-sequence2,0,0,0,0,0,10,0,0,0,0
-sequence3,0,0,0,0,0,0,0,10,0,0
-sequence4,0,0,0,10,0,0,0,0,0,0
+isolate,A,C,G,T
+sequence1,10,0,0,0
+sequence2,0,10,0,0
+sequence3,0,0,10,0
+sequence4,0,0,0,10
diff --git a/python/gubbins/tests/test_dependencies.py b/python/gubbins/tests/test_dependencies.py
@@ -14,6 +14,7 @@
 import shutil
 from gubbins import common, run_gubbins
 
+unittest.TestLoader.sortTestMethodsUsing = None
 modules_dir = os.path.dirname(os.path.abspath(common.__file__))
 data_dir = os.path.join(modules_dir, 'tests', 'data')
 working_dir = os.path.join(modules_dir, 'tests')
@@ -38,7 +39,7 @@ def test_pairwise(self):
                                                     "--threads", "1",
                                                     os.path.join(data_dir, 'pairwise.aln')]))
         exit_code = self.check_for_output_files('pairwise')
-        self.cleanup('multiple_recombinations')
+        self.cleanup('pairwise')
         assert exit_code == 0
 
     # Test individual tree builders
@@ -108,13 +109,13 @@ def test_rapidnj(self):
                                                     os.path.join(data_dir, 'multiple_recombinations.aln')]))
         exit_code = self.check_for_output_files('multiple_recombinations')
         # Copy file for subsequent tests
-        shutil.copyfile(os.path.join('multiple_recombinations.recombination_predictions.embl'),
-                        os.path.join('new_rapidnj_jc_output.recombination_predictions.embl'))
+        shutil.copyfile(os.path.join(data_dir, 'multiple_recombinations.recombination_predictions.embl'),
+                        os.path.join(data_dir, 'new_rapidnj_jc_output.recombination_predictions.embl'))
         self.cleanup('multiple_recombinations')
         assert exit_code == 0
 
     def check_rapidnj_consistency(self):
-        new_file = 'new_rapidnj_jc_output.recombination_predictions.embl'
+        new_file = os.path.join(data_dir,'new_rapidnj_jc_output.recombination_predictions.embl')
         reference_file = os.path.join(data_dir,'ref_rapidnj_jc_output.recombination_predictions.embl')
         assert common.have_recombinations_been_seen_before(reference_file,[new_file])
 
@@ -140,8 +141,8 @@ def test_defined_mislabelled_starting_tree(self):
                                                 "--verbose", "--iterations", "3",
                                                 "--threads", "1",
                                                 os.path.join(data_dir, 'mislabelled.multiple_recombinations.aln')]))
-        exit_code = self.check_for_output_files('multiple_recombinations')
-        self.cleanup('multiple_recombinations')
+        exit_code = self.check_for_output_files('mislabelled.multiple_recombinations')
+        self.cleanup('mislabelled.multiple_recombinations')
         assert exit_code == 0
 
     # Test initial star tree
@@ -459,7 +460,7 @@ def test_converge_on_rec(self):
                                                     "--converge-method", "recombination",
                                                     os.path.join(data_dir, 'multiple_recombinations.aln')]))
         exit_code = self.check_for_output_files('multiple_recombinations')
-        self.cleanup('bootstrapping_test')
+        self.cleanup('multiple_recombinations')
         assert exit_code == 0
 
     def test_converge_on_unweighted_rf(self):
@@ -471,7 +472,7 @@ def test_converge_on_unweighted_rf(self):
                                                     "--converge-method", "robinson_foulds",
                                                     os.path.join(data_dir, 'multiple_recombinations.aln')]))
         exit_code = self.check_for_output_files('multiple_recombinations')
-        self.cleanup('bootstrapping_test')
+        self.cleanup('multiple_recombinations')
         assert exit_code == 0
 
     # Test renaming of final output
@@ -507,7 +508,7 @@ def check_for_output_files(prefix):
 
     @staticmethod
     def cleanup(prefix):
-        os.chdir(working_dir)
+        #os.chdir(working_dir)
         regex_to_remove = prefix + ".*"
         for file in glob.glob(regex_to_remove):
             os.remove(file)
@@ -519,4 +520,4 @@ def cleanup(prefix):
                 os.rmdir(dir)
 
 if __name__ == "__main__":
-    unittest.main(buffer=True)
+    unittest.main(buffer=True)
diff --git a/python/gubbins/tests/test_python_scripts.py b/python/gubbins/tests/test_python_scripts.py
@@ -26,7 +26,7 @@ def test_alignment_checker(self):
         output_file = os.path.join(working_dir, "valid_alignment_test")
         output_csv = os.path.join(working_dir, "valid_alignment_test.csv")
         test_csv = os.path.join(data_dir, "test_valid_output.csv")
-        aln_cmd = "alignment_checker.py --aln " + small_aln + " --out " + output_file 
+        aln_cmd = "gubbins_alignment_checker.py --aln " + small_aln + " --out " + output_file
         subprocess.check_call(aln_cmd, shell=True)
         assert self.md5_check(output_csv, test_csv)
         os.remove(output_csv)
@@ -46,6 +46,7 @@ def test_clade_extraction(self):
         test_aln = os.path.join(data_dir, "multiple_recombinations_clade_extract.aln")
         test_gff = os.path.join(data_dir, "multiple_recombinations_clade_extract.gff")
         test_tree = os.path.join(data_dir, "multiple_recombinations_clade_extract.tree")
+        # Script name
         extract_clade_cmd = "extract_gubbins_clade.py --list " + clade_list + " --aln " + multiple_aln +\
             " --gff " + multiple_gff + " --tree " + multiple_tree + " --out " + base_path +  " --out-fmt fasta"
         subprocess.check_call(extract_clade_cmd, shell=True)
@@ -63,7 +64,7 @@ def test_masking_aln(self):
         out_aln = os.path.join(data_dir, "multiple_recombinations_mask.aln")
         ## Get the test file 
         test_aln = os.path.join(data_dir, "masking_multiple.aln")
-
+        # Script name
         extract_clade_cmd = "mask_gubbins_aln.py --aln " + multiple_aln +\
             " --gff " + multiple_gff + " --out " + out_aln +  " --out-fmt fasta"
         subprocess.check_call(extract_clade_cmd, shell=True)
@@ -73,14 +74,15 @@ def test_masking_aln(self):
     ## Test the ska alignment generator 
     def test_generate_ska_alignment(self):
         exit_code = 1
-        ## Get files to run initial ska alignment on via the bash script 
-        bash_script = os.path.join(preprocess_dir, 'fasta_list_creator.sh')
-        fasta_creator = "bash " + bash_script + " " + preprocess_dir
-        subprocess.check_call(fasta_creator, shell=True)
         ## Run the generate_ska_alignment script
-        fasta_loc = 'ska_fasta_list.txt'
+        fasta_loc = os.path.join(preprocess_dir, './ska_fasta_list.txt')
+        with open(fasta_loc,'w') as list_file:
+            for i in range(1,5):
+                list_file.write('sequence_t' + str(i) + '\t' + \
+                    os.path.join(preprocess_dir,'sequence_t' + str(i) + '.fasta\n'))
         ref_seq = os.path.join(preprocess_dir, 'sequence_t1.fasta')
         aln_out = os.path.join(preprocess_dir, 'ska_test_aln.aln')
+        # Script name
         ska_cmd = "generate_ska_alignment.py --fasta " + fasta_loc +\
             " --reference " + ref_seq + " --out " + aln_out +\
                 " --k 6"
@@ -96,8 +98,6 @@ def test_generate_ska_alignment(self):
         os.remove(fasta_loc)
         assert exit_code == 0
 
-
-
     @staticmethod
     def check_for_output_files(prefix):
         assert os.path.exists(prefix + '.summary_of_snp_distribution.vcf')

diff --git a/python/scripts/alignment_checker.py b/python/scripts/alignment_checker.py
diff --git a/python/scripts/generate_ska_alignment.py b/python/scripts/generate_ska_alignment.py
@@ -82,6 +82,8 @@ def ska_map_sequences(seq, k = None, ref = None):
 # main code
 if __name__ == "__main__":
 
+    __spec__ = None
+
     # Get command line options
     args = get_options()
 
@@ -133,7 +135,7 @@ def ska_map_sequences(seq, k = None, ref = None):
                                             k = args.k,
                                             names = seq_names),
                                             fastq_names)
-    
+
     # Map sequences
     with Pool(processes = args.threads) as pool:
         return_codes = pool.map(partial(ska_map_sequences,