Skip to content

Commit

Permalink
update ray and scikit-learn, and reduce test runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
akotlar committed Oct 12, 2024
1 parent 8e2e638 commit 758127c
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "bystro"
version = "1.0.3"
version = "1.0.0-beta21"
edition = "2021"
build = "build.rs"

Expand Down
6 changes: 3 additions & 3 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
version = "2.0.0-beta18"
version = "2.0.0-beta21"
dependencies = [
"openpyxl==3.1.2",
"boto3==1.28.9",
Expand All @@ -22,10 +22,10 @@ dependencies = [
"pandas==2.2.2",
"pyarrow==16.1.0",
"pystalk==0.7.0",
"ray[default]==2.31.0",
"ray[default]==2.37.0",
"ruamel.yaml==0.17.31",
"scikit-allel==1.3.8",
"scikit-learn==1.2.2",
"scikit-learn==1.5.2",
"skops==0.7.post0",
"tqdm==4.66.3",
"cloudpickle==3.0.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def simulate_data(n_batches=30, n_samples=15, p=3, seed=1993):


def test_bayesian():
X_list, controls, true_vals, batch_effects = simulate_data()
X_list, controls, true_vals, _batch_effects = simulate_data()

model = BatchAdaptationBayesian(n_burn=20, n_samples=50)
model = BatchAdaptationBayesian(n_burn=2, n_samples=5)
data_altered = model.fit_transform(X_list, controls)

data_altered_stack = np.vstack(data_altered)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def test_decision_function():
beta_m = np.zeros(n_p)
beta_p = np.zeros(n_p)
beta_p[:3] = 0.5
data = generate_data(beta_m, beta_p, rng, maf=0.1, n_individuals=20000)
data = generate_data(beta_m, beta_p, rng, maf=0.1, n_individuals=12000)
model = POESingleSNP(
compute_pvalue=True, cov_regularization="QuadraticInverse"
)
Expand All @@ -270,7 +270,7 @@ def test_multi_fit():
beta_p = np.zeros(n_p)
beta_p[:3] = 0.5
data = generate_multivariate_data(
beta_m, beta_p, rng, maf=0.03, n_individuals=500000, n_genotypes=1000
beta_m, beta_p, rng, maf=0.03, n_individuals=500, n_genotypes=100
)
model = POEMultipleSNP()
model.fit(data["phenotypes"], data["genotypes"])
Expand All @@ -284,9 +284,9 @@ def test_multi2_fit():
beta_p = np.zeros(n_p)
beta_p[:3] = 0.5
data = generate_multivariate_data(
beta_m, beta_p, rng, maf=0.03, n_individuals=50000, n_genotypes=1000
beta_m, beta_p, rng, maf=0.03, n_individuals=1000, n_genotypes=100
)
model = POEMultipleSNP2(n_repeats=10)
model = POEMultipleSNP2(n_repeats=3)
model.fit(data["phenotypes"], data["genotypes"], seed=2021)
assert model is not None, "Model fitting failed"
assert isinstance(model, POEMultipleSNP2), "Model type is incorrect"
4 changes: 2 additions & 2 deletions python/python/bystro/prs/tests/test_prscs_sgld.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def test_prscs():
mm = Ridge()
mm.fit(X, y)
beta = np.squeeze(mm.coef_)
model = PRSCS(training_options={"n_samples": 40000, "batch_size": 500})
model = PRSCS(training_options={"n_samples": 5000, "batch_size": 100})
model.fit(X, y,progress_bar=False)

posterior_mean = np.mean(model.samples_beta[20000:], axis=0)
posterior_mean = np.mean(model.samples_beta[2500:], axis=0)
assert np.mean((posterior_mean - beta) ** 2) < 0.1

0 comments on commit 758127c

Please sign in to comment.