update ray and scikit-learn, and reduce test runtime

akotlar · Oct 12, 2024 · 758127c · 758127c
1 parent 8e2e638
commit 758127c
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 12 deletions.
diff --git a/python/Cargo.toml b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "bystro"
-version = "1.0.3"
+version = "1.0.0-beta21"
 edition = "2021"
 build = "build.rs"
 

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -10,7 +10,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-version = "2.0.0-beta18"
+version = "2.0.0-beta21"
 dependencies = [
     "openpyxl==3.1.2",
     "boto3==1.28.9",
@@ -22,10 +22,10 @@ dependencies = [
     "pandas==2.2.2",
     "pyarrow==16.1.0",
     "pystalk==0.7.0",
-    "ray[default]==2.31.0",
+    "ray[default]==2.37.0",
     "ruamel.yaml==0.17.31",
     "scikit-allel==1.3.8",
-    "scikit-learn==1.2.2",
+    "scikit-learn==1.5.2",
     "skops==0.7.post0",
     "tqdm==4.66.3",
     "cloudpickle==3.0.0",

diff --git a/python/python/bystro/domain_adaptation/tests/test_batch_bayesian.py b/python/python/bystro/domain_adaptation/tests/test_batch_bayesian.py
@@ -19,9 +19,9 @@ def simulate_data(n_batches=30, n_samples=15, p=3, seed=1993):
 
 
 def test_bayesian():
-    X_list, controls, true_vals, batch_effects = simulate_data()
+    X_list, controls, true_vals, _batch_effects = simulate_data()
 
-    model = BatchAdaptationBayesian(n_burn=20, n_samples=50)
+    model = BatchAdaptationBayesian(n_burn=2, n_samples=5)
     data_altered = model.fit_transform(X_list, controls)
 
     data_altered_stack = np.vstack(data_altered)

diff --git a/python/python/bystro/parent_of_origin/tests/test_parent_of_origin.py b/python/python/bystro/parent_of_origin/tests/test_parent_of_origin.py
@@ -244,7 +244,7 @@ def test_decision_function():
     beta_m = np.zeros(n_p)
     beta_p = np.zeros(n_p)
     beta_p[:3] = 0.5
-    data = generate_data(beta_m, beta_p, rng, maf=0.1, n_individuals=20000)
+    data = generate_data(beta_m, beta_p, rng, maf=0.1, n_individuals=12000)
     model = POESingleSNP(
         compute_pvalue=True, cov_regularization="QuadraticInverse"
     )
@@ -270,7 +270,7 @@ def test_multi_fit():
     beta_p = np.zeros(n_p)
     beta_p[:3] = 0.5
     data = generate_multivariate_data(
-        beta_m, beta_p, rng, maf=0.03, n_individuals=500000, n_genotypes=1000
+        beta_m, beta_p, rng, maf=0.03, n_individuals=500, n_genotypes=100
     )
     model = POEMultipleSNP()
     model.fit(data["phenotypes"], data["genotypes"])
@@ -284,9 +284,9 @@ def test_multi2_fit():
     beta_p = np.zeros(n_p)
     beta_p[:3] = 0.5
     data = generate_multivariate_data(
-        beta_m, beta_p, rng, maf=0.03, n_individuals=50000, n_genotypes=1000
+        beta_m, beta_p, rng, maf=0.03, n_individuals=1000, n_genotypes=100
     )
-    model = POEMultipleSNP2(n_repeats=10)
+    model = POEMultipleSNP2(n_repeats=3)
     model.fit(data["phenotypes"], data["genotypes"], seed=2021)
     assert model is not None, "Model fitting failed"
     assert isinstance(model, POEMultipleSNP2), "Model type is incorrect"
diff --git a/python/python/bystro/prs/tests/test_prscs_sgld.py b/python/python/bystro/prs/tests/test_prscs_sgld.py
@@ -23,8 +23,8 @@ def test_prscs():
     mm = Ridge()
     mm.fit(X, y)
     beta = np.squeeze(mm.coef_)
-    model = PRSCS(training_options={"n_samples": 40000, "batch_size": 500})
+    model = PRSCS(training_options={"n_samples": 5000, "batch_size": 100})
     model.fit(X, y,progress_bar=False)
 
-    posterior_mean = np.mean(model.samples_beta[20000:], axis=0)
+    posterior_mean = np.mean(model.samples_beta[2500:], axis=0)
     assert np.mean((posterior_mean - beta) ** 2) < 0.1