From c6cae377b4128f0f38b323cc93b404b02e90b117 Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 14:14:01 -0500 Subject: [PATCH 1/8] add tool.pytest.ini_options for repo_review --- pyproject.toml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 92cc09bb4..cc2390cf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,13 @@ +[tool.pytest.ini_options] +minversion = "6.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +filterwarnings = ["error"] +log_cli_level = "info" +testpaths = [ + "tests", +] + [tool.cibuildwheel] skip = ["*-win32", "*_i686", # skip 32-bit builds "pp37-*", "pp310-*", # skip certain PyPy configurations From fafd6b5cbe6ecee56ecbef281e61b9dbbbdea7d8 Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 16:25:36 -0500 Subject: [PATCH 2/8] ensure all file handles are closed --- src/PyPop/ParseFile.py | 4 ++-- tests/test_100k_Emhaplofreq.py | 3 ++- tests/test_AlleleColon.py | 6 ++++-- tests/test_GenotypeCommon.py | 6 ++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/PyPop/ParseFile.py b/src/PyPop/ParseFile.py index 8daba3719..5a3a8d963 100644 --- a/src/PyPop/ParseFile.py +++ b/src/PyPop/ParseFile.py @@ -226,8 +226,8 @@ def _sampleFileRead(self, filename): *For internal use only*. """ - f = open(filename, 'r') - self.fileData = f.readlines() + with open(filename, 'r') as f: + self.fileData = f.readlines() def _mapPopHeaders(self): diff --git a/tests/test_100k_Emhaplofreq.py b/tests/test_100k_Emhaplofreq.py index ed6fc38ed..e506b8f6b 100644 --- a/tests/test_100k_Emhaplofreq.py +++ b/tests/test_100k_Emhaplofreq.py @@ -16,5 +16,6 @@ def test_100k_Emhaplofreq(): assert exit_code == 0 # compare with md5sum of output file # FIXME: disable for the moment - # assert hashlib.md5(open("Test_100k_20loci_Dataset-out.txt", 'rb').read()).hexdigest() == 'd899396e90d99ad9eb2002506e699091' + # with open("Test_100k_20loci_Dataset-out.txt", 'rb') as out_handle: + # assert hashlib.md5(out_handle.read()).hexdigest() == 'd899396e90d99ad9eb2002506e699091' diff --git a/tests/test_AlleleColon.py b/tests/test_AlleleColon.py index 4f3cd4da5..f332026fc 100644 --- a/tests/test_AlleleColon.py +++ b/tests/test_AlleleColon.py @@ -8,11 +8,13 @@ def test_AlleleColon_HardyWeinberg(): # check exit code assert exit_code == 0 # compare with md5sum of output file - assert hashlib.md5(open("Test_Allele_Colon_HardyWeinberg-out.txt", 'rb').read()).hexdigest() == '245a8a8493506c0b65ba9a3469173b13' + with open("Test_Allele_Colon_HardyWeinberg-out.txt", 'rb') as out_handle: + assert hashlib.md5(out_handle.read()).hexdigest() == '245a8a8493506c0b65ba9a3469173b13' def test_AlleleColon_Emhaplofreq(): exit_code = run_pypop_process('./tests/data/Test_Allele_Colon_Emhaplofreq.ini', './tests/data/Test_Allele_Colon_Emhaplofreq.pop') # check exit code assert exit_code == 0 # compare with md5sum of output file - assert hashlib.md5(open("Test_Allele_Colon_Emhaplofreq-out.txt", 'rb').read()).hexdigest() == 'dc9b6530a8d85e0d4cf86cecf6b0a9c9' + with open("Test_Allele_Colon_Emhaplofreq-out.txt", 'rb') as out_handle: + assert hashlib.md5(out_handle.read()).hexdigest() == 'dc9b6530a8d85e0d4cf86cecf6b0a9c9' diff --git a/tests/test_GenotypeCommon.py b/tests/test_GenotypeCommon.py index b126cf408..de4222e77 100644 --- a/tests/test_GenotypeCommon.py +++ b/tests/test_GenotypeCommon.py @@ -8,11 +8,13 @@ def test_GenotypeCommon_HardyWeinberg(): # check exit code assert exit_code == 0 # compare with md5sum of output file - assert hashlib.md5(open("BIGDAWG_SynthControl_Data-out.txt", 'rb').read()).hexdigest() == 'db4bc1113e9eab337561f7510e73381f' + with open("BIGDAWG_SynthControl_Data-out.txt", 'rb') as out_handle: + assert hashlib.md5(out_handle.read()).hexdigest() == 'db4bc1113e9eab337561f7510e73381f' def test_GenotypeCommonDash_HardyWeinberg(): exit_code = run_pypop_process('./tests/data/WS_BDCtrl_Test_HW.ini', './tests/data/BIGDAWG_SynthControl_Data_dash.pop') # check exit code assert exit_code == 0 # compare with md5sum of output file - assert hashlib.md5(open("BIGDAWG_SynthControl_Data_dash-out.txt", 'rb').read()).hexdigest() == '36053392f9dd25c9a2a6bb1fc6db242a' + with open("BIGDAWG_SynthControl_Data_dash-out.txt", 'rb') as out_handle: + assert hashlib.md5(out_handle.read()).hexdigest() == '36053392f9dd25c9a2a6bb1fc6db242a' From 1675d34ca8c28271b01656e637bf29c1d3e6c83d Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 19:09:39 -0500 Subject: [PATCH 3/8] only import parts of numpy that are needed --- src/PyPop/Haplo.py | 44 ++++++++++++++++++++++---------------------- src/PyPop/Utils.py | 8 +++++--- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/PyPop/Haplo.py b/src/PyPop/Haplo.py index c949bbf9a..248af65b0 100644 --- a/src/PyPop/Haplo.py +++ b/src/PyPop/Haplo.py @@ -38,9 +38,9 @@ """ import sys, os, re, io -import numpy import math import itertools as it +from numpy import unique, array, minimum, sqrt, random, nan, c_ from tempfile import TemporaryDirectory from PyPop.Arlequin import ArlequinBatch @@ -666,8 +666,8 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): Make standalone so it can be used by any class """ - unique_alleles1 = numpy.unique(haplos[:,0]) - unique_alleles2 = numpy.unique(haplos[:,1]) + unique_alleles1 = unique(haplos[:,0]) + unique_alleles2 = unique(haplos[:,1]) # FIXME: should merge the two into one loop freq1_dict = {} @@ -704,7 +704,7 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): allhaplos.append(newrow) # convert to numpy structured array - allhaplos = numpy.array(allhaplos, dtype=[('allele1', 'O'), ('allele2', 'O'), ('allele.freq1', float), ('allele.freq2', float), ('haplo.freq', float)]) + allhaplos = array(allhaplos, dtype=[('allele1', 'O'), ('allele2', 'O'), ('allele.freq1', float), ('allele.freq2', float), ('haplo.freq', float)]) # now we extract the columns we need for the computations hap_prob = allhaplos['haplo.freq'] @@ -717,11 +717,11 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): num_allpossible_haplos = len(allhaplos) ## compute Wn & Dprime - zero = numpy.array([0.0]) + zero = array([0.0]) dprime_den = zero.repeat(num_allpossible_haplos) d_ij = hap_prob - a_freq1 * a_freq2 - den_lt0 = numpy.minimum( a_freq1*a_freq2, (1-a_freq1)*(1-a_freq2) ) - den_ge0 = numpy.minimum( (1-a_freq1)*a_freq2, a_freq1*(1-a_freq2) ) + den_lt0 = minimum( a_freq1*a_freq2, (1-a_freq1)*(1-a_freq2) ) + den_ge0 = minimum( (1-a_freq1)*a_freq2, a_freq1*(1-a_freq2) ) dprime_den[d_ij < 0] = den_lt0[d_ij < 0] dprime_den[d_ij >=0] = den_ge0[d_ij >=0] dprime_ij = d_ij/dprime_den @@ -741,8 +741,8 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): w = w_ij.sum() # FIXME: NOT SURE THIS SYNTAX FOR 'min' IS CORRECT (OR GOOD) # WANT: wn <- sqrt( w / (min( length(unique(alleles1)), length(unique(alleles2)) ) - 1.0) ) - w_den = numpy.minimum(numpy.unique(alleles1).size*1.0, numpy.unique(alleles2).size*1.0) - 1.0 - wn = numpy.sqrt( w / w_den ) + w_den = minimum(unique(alleles1).size*1.0, unique(alleles2).size*1.0) - 1.0 + wn = sqrt( w / w_den ) if debug: print ("Wn: ", wn) if compute_ALD: @@ -751,23 +751,23 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): F_2_1 = 0.0 F_2 = 0.0 F_1_2 = 0.0 - for i in numpy.unique(alleles1): - af_1 = numpy.unique(a_freq1[alleles1==i])[0] # take the first element of ndarray (default behaviour) + for i in unique(alleles1): + af_1 = unique(a_freq1[alleles1==i])[0] # take the first element of ndarray (default behaviour) F_1 = F_1 + af_1**2 F_2_1 = F_2_1 + ((hap_prob[alleles1==i]**2)/af_1).sum() - for i in numpy.unique(alleles2): - af_2 = numpy.unique(a_freq2[alleles2==i])[0] + for i in unique(alleles2): + af_2 = unique(a_freq2[alleles2==i])[0] F_2 = F_2 + af_2**2 F_1_2 = F_1_2 + ((hap_prob[alleles2==i]**2)/af_2).sum() if F_2 == 1.0: - F_2_1_prime = numpy.nan - ALD_2_1 = numpy.nan + F_2_1_prime = nan + ALD_2_1 = nan else: F_2_1_prime = (F_2_1 - F_2)/(1 - F_2) ALD_2_1 = math.sqrt(F_2_1_prime) if F_1 == 1: - F_1_2_prime = numpy.nan - ALD_1_2 = numpy.nan + F_1_2_prime = nan + ALD_1_2 = nan else: F_1_2_prime = (F_1_2 - F_1)/(1 - F_1) ALD_1_2 = math.sqrt(F_1_2_prime) @@ -890,7 +890,7 @@ def estHaplotypes(self, iseed1 = 18717; iseed2= 16090; iseed3=14502 random_start = 0 else: - seed_array = numpy.random.random(3) + seed_array = random.random(3) iseed1 = int(10000 + 20000*seed_array[0]) iseed2 = int(10000 + 20000*seed_array[1]) iseed3 = int(10000 + 20000*seed_array[2]) @@ -922,7 +922,7 @@ def estHaplotypes(self, iseed3 = iseed3 + i*100 random_start = 1 # need this in testMode too, apparently else: - seed_array = numpy.random.random(3) + seed_array = random.random(3) iseed1 = int(10000 + 20000*seed_array[0]) iseed2 = int(10000 + 20000*seed_array[1]) iseed3 = int(10000 + 20000*seed_array[2]) @@ -966,7 +966,7 @@ def estHaplotypes(self, hap2_code_new # convert u_hap back into original allele names - haplotype = numpy.array(u_hap, dtype='O').reshape(n_u_hap, -1) + haplotype = array(u_hap, dtype='O').reshape(n_u_hap, -1) for j in range(0, n_loci): for i in range(0, n_u_hap): allele_offset = haplotype[i,j] - 1 # integers are offset by 1 @@ -981,8 +981,8 @@ def estHaplotypes(self, # FIXME: are these, strictly speaking, necessary in Python context? # these arrays can be regenerated from the vectors at any time - uhap_df = numpy.c_[u_hap_code, hap_prob] - subj_df = numpy.c_[subj_id, hap1_code, hap2_code] + uhap_df = c_[u_hap_code, hap_prob] + subj_df = c_[subj_id, hap1_code, hap2_code] # XML output for group here self.stream.opentag('group', mode="all-pairwise-ld-no-permu", loci=locusKeys, showHaplo="yes") diff --git a/src/PyPop/Utils.py b/src/PyPop/Utils.py index 89dfb79ba..1c941035b 100644 --- a/src/PyPop/Utils.py +++ b/src/PyPop/Utils.py @@ -40,7 +40,9 @@ """ import os, sys, types, stat, re, shutil, copy, operator -import numpy as np +from numpy import sum as np_sum +from numpy import where as np_where +from numpy import not_equal as np_not_equal from numpy import zeros, take, asarray GENOTYPE_SEPARATOR = "~" GENOTYPE_TERMINATOR= "~" @@ -654,8 +656,8 @@ def countPairs(self): # FIXME: maybe convert to it's own method as per getUniqueAlleles h1 = self.array[:, 0::2] # get "_1" allele (odd cols) h2 = self.array[:, 1::2] # get "_2" allele (even cols) - n_het = np.sum(np.not_equal(h1, h2), 1) # equivalent of: apply(h1!=h2,1,sum) - n_het = np.where(n_het == 0, 1, n_het) # equivalent of: ifelse(n.het==0,1,n.het) + n_het = np_sum(np_not_equal(h1, h2), 1) # equivalent of: apply(h1!=h2,1,sum) + n_het = np_where(n_het == 0, 1, n_het) # equivalent of: ifelse(n.het==0,1,n.het) n_pairs = 2 ** (n_het - 1) # equivalent of: n.pairs = 2^(n.het-1) return n_pairs.tolist() From 47c91c5ee0362f277045622f491036dcdbb08e46 Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 20:19:48 -0500 Subject: [PATCH 4/8] ignore ImportWarnings in numpy (needed for pp38) --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cc2390cf8..ccf1bba6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,9 @@ minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true -filterwarnings = ["error"] +# convert warnings to errors, except for ImportWarnings in numpy (needed for pp38) +# FIXME: eventually we should be able to remove the numpy exception +filterwarnings = ["error", "default::ImportWarning:numpy*:"] log_cli_level = "info" testpaths = [ "tests", From 10eb68f1ad35e16e69a78dacf075d2ffd4b03ca6 Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 20:45:32 -0500 Subject: [PATCH 5/8] make even more specific --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ccf1bba6c..50f752e28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true # convert warnings to errors, except for ImportWarnings in numpy (needed for pp38) # FIXME: eventually we should be able to remove the numpy exception -filterwarnings = ["error", "default::ImportWarning:numpy*:"] +filterwarnings = ["error", "default::ImportWarning:numpy.random.*:"] log_cli_level = "info" testpaths = [ "tests", From 67f56aed810554e97a5b23bc6bcacf32d7b8a511 Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 22:28:24 -0500 Subject: [PATCH 6/8] don't raise errors on any ImportWarning --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 50f752e28..655de3e5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true # convert warnings to errors, except for ImportWarnings in numpy (needed for pp38) # FIXME: eventually we should be able to remove the numpy exception -filterwarnings = ["error", "default::ImportWarning:numpy.random.*:"] +filterwarnings = ["error", "default::ImportWarning"] log_cli_level = "info" testpaths = [ "tests", From 6f189e7354be3c972ed60bae788749c3117761d9 Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 23:29:18 -0500 Subject: [PATCH 7/8] `NEWS.rst` -> `NEWS.md` --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2c92e89b8..bba8eac0a 100644 --- a/setup.py +++ b/setup.py @@ -231,7 +231,7 @@ def path_to_src(source_path_list): url = "http://www.pypop.org/", project_urls={ 'Documentation': 'http://pypop.org/docs/', - 'Changelog': 'https://github.com/alexlancaster/pypop/blob/main/NEWS.rst', + 'Changelog': 'https://github.com/alexlancaster/pypop/blob/main/NEWS.md', 'Source': 'https://github.com/alexlancaster/pypop/', 'Tracker': 'https://github.com/alexlancaster/pypop/issues', }, From 78ce3efd918f41aae9a82853487891759f9baa2c Mon Sep 17 00:00:00 2001 From: Alex Lancaster Date: Mon, 22 Jan 2024 23:46:07 -0500 Subject: [PATCH 8/8] Revert "only import parts of numpy that are needed" This reverts commit 1675d34ca8c28271b01656e637bf29c1d3e6c83d. --- src/PyPop/Haplo.py | 44 ++++++++++++++++++++++---------------------- src/PyPop/Utils.py | 8 +++----- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/PyPop/Haplo.py b/src/PyPop/Haplo.py index 248af65b0..c949bbf9a 100644 --- a/src/PyPop/Haplo.py +++ b/src/PyPop/Haplo.py @@ -38,9 +38,9 @@ """ import sys, os, re, io +import numpy import math import itertools as it -from numpy import unique, array, minimum, sqrt, random, nan, c_ from tempfile import TemporaryDirectory from PyPop.Arlequin import ArlequinBatch @@ -666,8 +666,8 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): Make standalone so it can be used by any class """ - unique_alleles1 = unique(haplos[:,0]) - unique_alleles2 = unique(haplos[:,1]) + unique_alleles1 = numpy.unique(haplos[:,0]) + unique_alleles2 = numpy.unique(haplos[:,1]) # FIXME: should merge the two into one loop freq1_dict = {} @@ -704,7 +704,7 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): allhaplos.append(newrow) # convert to numpy structured array - allhaplos = array(allhaplos, dtype=[('allele1', 'O'), ('allele2', 'O'), ('allele.freq1', float), ('allele.freq2', float), ('haplo.freq', float)]) + allhaplos = numpy.array(allhaplos, dtype=[('allele1', 'O'), ('allele2', 'O'), ('allele.freq1', float), ('allele.freq2', float), ('haplo.freq', float)]) # now we extract the columns we need for the computations hap_prob = allhaplos['haplo.freq'] @@ -717,11 +717,11 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): num_allpossible_haplos = len(allhaplos) ## compute Wn & Dprime - zero = array([0.0]) + zero = numpy.array([0.0]) dprime_den = zero.repeat(num_allpossible_haplos) d_ij = hap_prob - a_freq1 * a_freq2 - den_lt0 = minimum( a_freq1*a_freq2, (1-a_freq1)*(1-a_freq2) ) - den_ge0 = minimum( (1-a_freq1)*a_freq2, a_freq1*(1-a_freq2) ) + den_lt0 = numpy.minimum( a_freq1*a_freq2, (1-a_freq1)*(1-a_freq2) ) + den_ge0 = numpy.minimum( (1-a_freq1)*a_freq2, a_freq1*(1-a_freq2) ) dprime_den[d_ij < 0] = den_lt0[d_ij < 0] dprime_den[d_ij >=0] = den_ge0[d_ij >=0] dprime_ij = d_ij/dprime_den @@ -741,8 +741,8 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): w = w_ij.sum() # FIXME: NOT SURE THIS SYNTAX FOR 'min' IS CORRECT (OR GOOD) # WANT: wn <- sqrt( w / (min( length(unique(alleles1)), length(unique(alleles2)) ) - 1.0) ) - w_den = minimum(unique(alleles1).size*1.0, unique(alleles2).size*1.0) - 1.0 - wn = sqrt( w / w_den ) + w_den = numpy.minimum(numpy.unique(alleles1).size*1.0, numpy.unique(alleles2).size*1.0) - 1.0 + wn = numpy.sqrt( w / w_den ) if debug: print ("Wn: ", wn) if compute_ALD: @@ -751,23 +751,23 @@ def _compute_LD(haplos, freqs, compute_ALD=False, debug=False): F_2_1 = 0.0 F_2 = 0.0 F_1_2 = 0.0 - for i in unique(alleles1): - af_1 = unique(a_freq1[alleles1==i])[0] # take the first element of ndarray (default behaviour) + for i in numpy.unique(alleles1): + af_1 = numpy.unique(a_freq1[alleles1==i])[0] # take the first element of ndarray (default behaviour) F_1 = F_1 + af_1**2 F_2_1 = F_2_1 + ((hap_prob[alleles1==i]**2)/af_1).sum() - for i in unique(alleles2): - af_2 = unique(a_freq2[alleles2==i])[0] + for i in numpy.unique(alleles2): + af_2 = numpy.unique(a_freq2[alleles2==i])[0] F_2 = F_2 + af_2**2 F_1_2 = F_1_2 + ((hap_prob[alleles2==i]**2)/af_2).sum() if F_2 == 1.0: - F_2_1_prime = nan - ALD_2_1 = nan + F_2_1_prime = numpy.nan + ALD_2_1 = numpy.nan else: F_2_1_prime = (F_2_1 - F_2)/(1 - F_2) ALD_2_1 = math.sqrt(F_2_1_prime) if F_1 == 1: - F_1_2_prime = nan - ALD_1_2 = nan + F_1_2_prime = numpy.nan + ALD_1_2 = numpy.nan else: F_1_2_prime = (F_1_2 - F_1)/(1 - F_1) ALD_1_2 = math.sqrt(F_1_2_prime) @@ -890,7 +890,7 @@ def estHaplotypes(self, iseed1 = 18717; iseed2= 16090; iseed3=14502 random_start = 0 else: - seed_array = random.random(3) + seed_array = numpy.random.random(3) iseed1 = int(10000 + 20000*seed_array[0]) iseed2 = int(10000 + 20000*seed_array[1]) iseed3 = int(10000 + 20000*seed_array[2]) @@ -922,7 +922,7 @@ def estHaplotypes(self, iseed3 = iseed3 + i*100 random_start = 1 # need this in testMode too, apparently else: - seed_array = random.random(3) + seed_array = numpy.random.random(3) iseed1 = int(10000 + 20000*seed_array[0]) iseed2 = int(10000 + 20000*seed_array[1]) iseed3 = int(10000 + 20000*seed_array[2]) @@ -966,7 +966,7 @@ def estHaplotypes(self, hap2_code_new # convert u_hap back into original allele names - haplotype = array(u_hap, dtype='O').reshape(n_u_hap, -1) + haplotype = numpy.array(u_hap, dtype='O').reshape(n_u_hap, -1) for j in range(0, n_loci): for i in range(0, n_u_hap): allele_offset = haplotype[i,j] - 1 # integers are offset by 1 @@ -981,8 +981,8 @@ def estHaplotypes(self, # FIXME: are these, strictly speaking, necessary in Python context? # these arrays can be regenerated from the vectors at any time - uhap_df = c_[u_hap_code, hap_prob] - subj_df = c_[subj_id, hap1_code, hap2_code] + uhap_df = numpy.c_[u_hap_code, hap_prob] + subj_df = numpy.c_[subj_id, hap1_code, hap2_code] # XML output for group here self.stream.opentag('group', mode="all-pairwise-ld-no-permu", loci=locusKeys, showHaplo="yes") diff --git a/src/PyPop/Utils.py b/src/PyPop/Utils.py index 1c941035b..89dfb79ba 100644 --- a/src/PyPop/Utils.py +++ b/src/PyPop/Utils.py @@ -40,9 +40,7 @@ """ import os, sys, types, stat, re, shutil, copy, operator -from numpy import sum as np_sum -from numpy import where as np_where -from numpy import not_equal as np_not_equal +import numpy as np from numpy import zeros, take, asarray GENOTYPE_SEPARATOR = "~" GENOTYPE_TERMINATOR= "~" @@ -656,8 +654,8 @@ def countPairs(self): # FIXME: maybe convert to it's own method as per getUniqueAlleles h1 = self.array[:, 0::2] # get "_1" allele (odd cols) h2 = self.array[:, 1::2] # get "_2" allele (even cols) - n_het = np_sum(np_not_equal(h1, h2), 1) # equivalent of: apply(h1!=h2,1,sum) - n_het = np_where(n_het == 0, 1, n_het) # equivalent of: ifelse(n.het==0,1,n.het) + n_het = np.sum(np.not_equal(h1, h2), 1) # equivalent of: apply(h1!=h2,1,sum) + n_het = np.where(n_het == 0, 1, n_het) # equivalent of: ifelse(n.het==0,1,n.het) n_pairs = 2 ** (n_het - 1) # equivalent of: n.pairs = 2^(n.het-1) return n_pairs.tolist()