-
Notifications
You must be signed in to change notification settings - Fork 0
/
calc_pca2_projection.sh
91 lines (85 loc) · 2.3 KB
/
calc_pca2_projection.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
set -e
source constants_.sh
source parse_args.sh "$@"
# Parse input
target_dataset="${datasets_path}${target}/${imp}/"
# ref_dataset="${datasets_path}${target%%_*}/${imp}/"
ref_dataset=${datasets_path}/${ref_dataset}/
if [[ -z ${maf} ]]; then maf=0.05; fi
if [[ -z ${geno} ]]; then geno=0.1; fi
if [[ -z ${imp} ]]; then imp="original"; fi
if [[ -z ${memory} ]]; then memory=500000; fi
if [[ -z ${threads} ]]; then threads=80; fi
if [[ -z ${stage} ]]; then stage=4; fi
# Start pipeline
if [[ ${stage} -le 1 ]]; then
echo '### perform ref prunning ###'
plink \
--bfile ${ref_dataset}ds.QC \
--out ${ref_dataset}ds \
--memory ${memory} \
--threads ${threads} \
--indep-pairwise 200 50 0.25
fi
if [[ ${stage} -le 2 ]]; then
echo '### calc ref pca'
plink2 \
--bfile ${ref_dataset}ds.QC \
--out ${ref_dataset}ds.ref \
--memory ${memory} \
--threads ${threads} \
--extract ${ref_dataset}ds.prune.in \
--freq counts \
--make-rel \
--pca approx allele-wts 6
fi
#if [[ ${stage} -le 2 ]]; then
# echo '### QC ###'
# plink \
# --bfile ${target_dataset}ds \
# --out ${target_dataset}ds.QC \
# --memory ${memory} \
# --threads ${threads} \
# --maf ${maf} \
# --geno ${geno} \
# --hwe 1e-6 \
# --make-bed
#fi
# if [[ ${stage} -le 3 ]]; then
# echo '### perform prunning ###'
# plink \
# --bfile ${target_dataset}ds.QC \
# --out ${target_dataset}ds \
# --memory ${memory} \
# --threads ${threads} \
# --indep-pairwise 200 50 0.25
# fi
# if [[ ${stage} -le 4 ]]; then
# echo "calc target pca"
# plink2 \
# --bfile ${target_dataset}ds.QC \
# --out ${target_dataset}ds.pca \
# --memory ${memory} \
# --threads ${threads} \
# --extract ${ref_dataset}ds.prune.in \
# --geno 0.1 \
# --mind 0.1 \
# --read-freq ${ref_dataset}ds.ref.acount \
# --score ${ref_dataset}ds.ref.eigenvec.allele 2 5 header-read no-mean-imputation variance-standardize \
# --score-col-nums 6-11
# fi
if [[ ${stage} -le 4 ]]; then
echo "calc target pca"
plink2 \
--bfile ${target_dataset}ds.QC \
--out ${target_dataset}ds.pca \
--memory ${memory} \
--threads ${threads} \
--extract ${ref_dataset}ds.prune.in \
--geno 0.1 \
--mind 0.1 \
--read-freq ${ref_dataset}ds.ref.acount \
--score ${ref_dataset}ds.ref.eigenvec.allele 2 5 header-read no-mean-imputation \
--score-col-nums 6-11
fi