-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path13_ldsc.bsub
86 lines (72 loc) · 2.05 KB
/
13_ldsc.bsub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
#BSUB -q long
#BSUB -J ldsc_binary_rg[1-15]
#BSUB -n 1
#BSUB -R "span[ptile=1]"
#BSUB -R "rusage[mem=8GB]"
#BSUB -o /your/out/dir/ldsc_binary_rg_%J_%I.out
#BSUB -e /your/err/dir/ldsc_binary_rg_%J_%I.err
# This script calculate ldsc
# Load modules
module load anaconda3/2021.05
# Set env
source /./.bashrc
conda activate /./.conda/envs/ldsc
# Find and sort the files
file_paths=$(find /./binary -type f -name '*ldsc*' -exec printf "%s\n" {} + | sort | tr '\n' ',' | sed 's/,$//')
# Split the file paths into 10 chunks
chunk_size=$(( $(echo $file_paths | tr -cd ',' | wc -c) / 10 + 1 )) # calculate chunk size
chunks=($(echo $file_paths | tr ',' ' ')) # convert comma-separated paths to array
# Create 10 chunks
for ((i=0; i<10; i++)); do
start=$((i * chunk_size))
end=$((start + chunk_size - 1))
chunk_paths=$(IFS=, ; echo "${chunks[*]:$start:$chunk_size}")
eval "file_paths$i=\"$chunk_paths\""
done
#target phenotypes
pheno1=Cell_B
pheno2=Cell_P
pheno3=CLL
pheno4=DLBCL
pheno5=Drug_G1
pheno6=FL
pheno7=HL
pheno8=LM
pheno9=LPL_WM
pheno10=MGUS
pheno11=MM_MGUS
pheno12=MM
pheno13=MZL
pheno14=Soma_G1
pheno15=Soma_G2
path0="/your/sumstat/dir/"
path1=${path0}Cell_B
path2=${path0}Cell_P
path3=${path0}CLL
path4=${path0}DLBCL
path5=${path0}Drug_G1
path6=${path0}FL
path7=${path0}HL
path8=${path0}LM
path9=${path0}LPL_WM
path10=${path0}MGUS
path11=${path0}MM_MGUS
path12=${path0}MM
path13=${path0}MZL
path14=${path0}Soma_G1
path15=${path0}Soma_G2
combined1=path${LSB_JOBINDEX}
combined2=pheno${LSB_JOBINDEX}
# example input:Cell_B_maf0p005_noMHC_munge.sumstats.sumstats.gz
# col.names: SNP A1 A2 Z N
# Iterate through file paths variables and execute LDSC commands
for i in {0..9}; do
current_file_paths_var="file_paths$i"
output_prefix="/your/out/dir/${!combined2}_binary${i}_ldsc_rg"
python /./ldsc/ldsc.py \
--rg ${!combined1}/${!combined2}_maf0p005_noMHC_munge.sumstats.sumstats.gz,${!current_file_paths_var} \
--ref-ld-chr /./ldsc/eur_w_ld_chr/ \
--w-ld-chr /./ldsc/eur_w_ld_chr/ \
--out ${output_prefix}
done