Skip to content

Commit

Permalink
test all svd methods for LD module
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Sep 25, 2024
1 parent 2afe950 commit 4167474
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 33 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,4 @@ jobs:
- name: test LD module
run: |
make ld_matrix
make ld_prune
make ld_clump
make ld_tests
52 changes: 32 additions & 20 deletions Makefile
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ SLIBS += ./external/bgen/bgenlib.a ./external/zstd/lib/libzstd.a

LIBS += ${SLIBS} ${DLIBS} -lm -ldl

.PHONY: all clean ld_matrix ld_prune ld_clump
.PHONY: all clean ld_matrix ld_prune ld_clump ld_tests

all: ${program}

Expand Down Expand Up @@ -176,32 +176,44 @@ data:
tar -xzf example.tar.gz && rm -f example.tar.gz

ld_matrix:
./PCAone -b example/plink -k 3 --ld -o adj -m 1
./PCAone -b example/plink -k 3 --ld -o pcaone
./PCAone -b example/plink -k 3 --ld -o adj -d 2 -m 4
./PCAone -b example/plink -k 3 --ld -o pcaone -d 2
diff adj.kept.bim pcaone.kept.bim
cut -f1 adj.kept.bim | sort -cn ## check if sorted
awk '$$1==3' adj.kept.bim | cut -f4 | sort -cn
rm -f pcaone.*

ld_prune:
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 1
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null

ld_clump:
./PCAone -B adj.residuals \
--ld-bim adj.kept.bim \
--clump example/plink.pheno0.assoc,example/plink.pheno1.assoc \
--clump-p1 0.01 \
--clump-p2 0.05 \
--clump-r2 0.1 \
--clump-bp 10000000 \
-o adj_clump_m0

./PCAone -B adj.residuals \
--ld-bim adj.kept.bim \
--clump example/plink.pheno0.assoc,example/plink.pheno1.assoc \
--clump-p1 0.01 \
--clump-p2 0.05 \
--clump-r2 0.1 \
--clump-bp 10000000 \
-o adj_clump_m1 -m 1
./PCAone -B adj.residuals --ld-bim adj.kept.bim --clump example/plink.pheno0.assoc --clump-p1 0.01 --clump-p2 0.05 --clump-r2 0.1 --clump-bp 10000000 -m 0 -o adj_clump_m0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --clump example/plink.pheno0.assoc --clump-p1 0.01 --clump-p2 0.05 --clump-r2 0.1 --clump-bp 10000000 -m 1 -o adj_clump_m1

ld_tests:
./PCAone -b example/plink -k 3 --ld -o adj -d 0 -m 4
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 2
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null
./PCAone -b example/plink -k 3 --ld -o adj -d 0 --maf 0.1
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 2
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null
./PCAone -b example/plink -k 3 --ld -o adj -d 1 -m 4
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 2
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null
./PCAone -b example/plink -k 3 --ld -o adj -d 1 --maf 0.1
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 2
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null
./PCAone -b example/plink -k 3 --ld -o adj -d 2 -m 4
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 2
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null
./PCAone -b example/plink -k 3 --ld -o adj -d 2 --maf 0.1
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m0 -m 0
./PCAone -B adj.residuals --ld-bim adj.kept.bim --ld-r2 0.8 --ld-bp 1000000 -o adj_prune_m1 -m 2
diff adj_prune_m0.ld.prune.out adj_prune_m1.ld.prune.out > /dev/null
10 changes: 7 additions & 3 deletions src/Arnoldi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ void run_pca_with_arnoldi(Data* data, const Param& params) {
// write to files;
data->write_eigs_files(evals, U, V);
// NOTE: pcangsd only gives us evals of covariance matrix
if (params.ld && !params.pcangsd) {
data->write_residuals(svals, U, V);
}
if (params.ld && !params.pcangsd)
data->write_residuals(svals, U, V.transpose());

} else {
// for blockwise
ArnoldiOpData* op = new ArnoldiOpData(data);
Expand Down Expand Up @@ -200,7 +200,11 @@ void run_pca_with_arnoldi(Data* data, const Param& params) {
flip_UV(op->U, op->VT);
evals.noalias() = eigs->eigenvalues() / data->nsnps;
}

data->write_eigs_files(evals, op->U, op->VT.transpose());
if (params.ld && !params.pcangsd)
data->write_residuals(op->S, op->U, op->VT);

delete op;
delete eigs;
}
Expand Down
5 changes: 4 additions & 1 deletion src/Cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "Cmd.hpp"

#include <iostream>

#include "popl/popl.hpp"

using namespace popl;
Expand Down Expand Up @@ -115,7 +117,7 @@ Param::Param(int argc, char **argv) {
else if(svd_opt->value()==2)
svd_t = SvdType::PCAoneAlg2;
else if(svd_opt->value()==3)
svd_t = SvdType::FULL, out_of_core = false;
svd_t = SvdType::FULL;
else
svd_t = SvdType::PCAoneAlg2;

Expand Down Expand Up @@ -148,6 +150,7 @@ Param::Param(int argc, char **argv) {
keepsnp = maf > 0 ? true : false;
if (ld_r2 > 0 || !clump.empty()) pca = false;
if (svd_t == SvdType::PCAoneAlg2 && !noshuffle) perm = true;
if(svd_t == SvdType::FULL) out_of_core = false;
}
catch(const popl::invalid_option & e)
{
Expand Down
6 changes: 3 additions & 3 deletions src/Data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ void Data::write_eigs_files(const MyVector &S, const MyMatrix &U,
}

void Data::write_residuals(const MyVector &S, const MyMatrix &U,
const MyMatrix &V) {
const MyMatrix &VT) {
// we always filter snps for in-core mode
if (params.ld_stats == 1) {
cao.print(tick.date(),
Expand All @@ -218,7 +218,7 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U,
uint64 idx;
if (!params.out_of_core) {
if (params.ld_stats == 0)
G -= U * S.asDiagonal() * V.transpose(); // get residuals matrix
G -= U * S.asDiagonal() * VT; // get residuals matrix
G.rowwise() -= G.colwise().mean(); // Centering
for (Eigen::Index ib = 0; ib < G.cols(); ib++) {
fg = G.col(ib).cast<float>();
Expand All @@ -235,7 +235,7 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U,
read_block_initial(start[b], stop[b], false);
// G (nsamples, actual_block_size)
if (params.ld_stats == 0)
G -= U * S.asDiagonal() * V.transpose().middleCols(start[b], G.cols());
G -= U * S.asDiagonal() * VT.middleCols(start[b], G.cols());
G.rowwise() -= G.colwise().mean(); // Centering
for (Eigen::Index ib = 0; ib <= stop[b] - start[b]; ib++, i++) {
fg = G.col(ib).cast<float>();
Expand Down
3 changes: 2 additions & 1 deletion src/Data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class Data {
const MyMatrix& VT);
void write_eigs_files(const MyVector& S, const MyMatrix& U,
const MyMatrix& V);
void write_residuals(const MyVector& S, const MyMatrix& U, const MyMatrix& V);
void write_residuals(const MyVector& S, const MyMatrix& U,
const MyMatrix& VT);
// for blockwise
void calcu_vt_initial(const MyMatrix& T, MyMatrix& VT, bool standardize);
void calcu_vt_update(const MyMatrix& T, const MyMatrix& U,
Expand Down
4 changes: 1 addition & 3 deletions src/Halko.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,7 @@ void run_pca_with_halko(Data* data, const Param& params) {
else
data->write_eigs_files(rsvd->S.array().square() / data->nsnps, rsvd->U,
rsvd->V);
if (params.ld) {
data->write_residuals(rsvd->S, rsvd->U, rsvd->V);
}
if (params.ld) data->write_residuals(rsvd->S, rsvd->U, rsvd->V.transpose());
}

delete rsvd;
Expand Down

0 comments on commit 4167474

Please sign in to comment.