-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from hongzhonglu/dev
feat: add the general function to conduct the clumps and hotspot anal…
- Loading branch information
Showing
4 changed files
with
118 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#----------------note | ||
#this main script is used to handle with the gene mutation only from SNP information | ||
#in this process, the gene with SNP will be translated into protein, based on which | ||
#the SNP could be classified into nsSNP and sSNP | ||
#Only nsSNP is used to mapping onto protein 3D structure | ||
|
||
strain_classification <- read.table("data/strain_PDETOH_classification.txt", header = TRUE,stringsAsFactors = FALSE) | ||
strain_classification <- strain_classification[, c('strain_name','type')] | ||
strain_type <-"PDETOH_high" | ||
strain_select1 <- chooseStrain(type = "PDETOH_high") | ||
|
||
|
||
|
||
|
||
##batch process for the above whole process | ||
##batch process | ||
# input the gene information | ||
pdb_Ex <- read_excel("data/pdb_Ex refine for final residue distance calculation_manual check.xlsx") | ||
pdb_Ex <- filter(pdb_Ex, is.na(pdb_Ex$With_distance)) | ||
pdb_Ex$pdbid <- paste(pdb_Ex$template, pdb_Ex$chain_new, sep = "@") | ||
pdb_Ex <- select(pdb_Ex, locus, pdbid, qstart2, qend2, sstart2, send2) | ||
geneWithSNP <- getGeneNameWithSNP() | ||
pdb_Ex <- pdb_Ex[which(pdb_Ex$locus %in% geneWithSNP ==TRUE),] | ||
|
||
#add two more clumns | ||
pdb_Ex$strain_type <- strain_type | ||
pdb_Ex$p_value <- NA | ||
|
||
#creat new file to store the results | ||
outfile0 <- paste('result/CLUMPS from pdb_ex for ', strain_type, sep = "") | ||
dir.create(outfile0) | ||
print(outfile0) | ||
|
||
|
||
#main loop | ||
for (i in 1:1047) { | ||
print(i) | ||
i <- 1 | ||
ss0 <- pdb_Ex$locus[i] | ||
mutated_gene0 <- preprocessSNP(ss0,gene_feature = gene_feature0) | ||
mutated_gene1 <- mutated_gene0[which(mutated_gene0$strain %in% strain_select1$Standardized_name), ] | ||
pdbID <- pdb_Ex$pdbid[i] | ||
p1 <- pdb_Ex$sstart2[i] | ||
p2 <- pdb_Ex$send2[i] | ||
distance_dir <- paste("residue_distance/pdb_ex/", pdbID, ".txt", sep = "") | ||
#in some function, gene_snp was not put as the input, should be careful | ||
#gene_snp <- getGeneCoordinate(gene_name = ss0, genesum = gene_feature_GEM) | ||
result0 <- clumpsAnalysis(gene0 = ss0, SNPlist0 = mutated_gene1, gene_annotation0 = gene_feature0, pdb_dir = distance_dir, sstart0 = p1, send0 = p2) | ||
pdb_Ex$p_value[i] <- result0 | ||
} | ||
|
||
# save the result | ||
write.table(pdb_Ex, paste(outfile0,'/','pdb_EX.txt', sep = ""), row.names = FALSE, sep = "\t") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#----------------note | ||
# this main script is used to handle with the gene mutation only from SNP information | ||
# in this process, the gene with SNP will be translated into protein, based on which | ||
# the SNP could be classified into nsSNP and sSNP | ||
# Only nsSNP is used to mapping onto protein 3D structure | ||
|
||
strain_classification <- read.table("data/strain_PDETOH_classification.txt", header = TRUE, stringsAsFactors = FALSE) | ||
strain_classification <- strain_classification[, c('strain_name','type')] | ||
strain_type <-"PDETOH_high" | ||
strain_select1 <- chooseStrain(type = "PDETOH_high") | ||
|
||
#------------batch process-------------------------------------------------------- | ||
# input the gene information | ||
pdb_Ex <- read_excel("data/pdb_Ex refine for final residue distance calculation_manual check.xlsx") | ||
pdb_Ex <- filter(pdb_Ex, is.na(pdb_Ex$With_distance)) | ||
pdb_Ex$pdbid <- paste(pdb_Ex$template, pdb_Ex$chain_new, sep = "@") | ||
pdb_Ex <- select(pdb_Ex, locus, pdbid, qstart2, qend2, sstart2, send2) | ||
geneWithSNP <- getGeneNameWithSNP() | ||
pdb_Ex <- pdb_Ex[which(pdb_Ex$locus %in% geneWithSNP ==TRUE),] | ||
|
||
#creat new file to store the results | ||
outfile0 <- paste('result/hotspot from pdb_ex for ', strain_type, sep = "") | ||
dir.create(outfile0) | ||
print(outfile0) | ||
|
||
# start the batch process | ||
for (i in 1:1047) { | ||
print(i) | ||
ss0 <- pdb_Ex$locus[i] | ||
mutated_gene0 <- preprocessSNP(ss0, gene_feature = gene_feature0) | ||
mutated_gene1 <- mutated_gene0[which(mutated_gene0$strain %in% strain_select1$Standardized_name), ] | ||
pdbID <- pdb_Ex$pdbid[i] | ||
p1 <- pdb_Ex$sstart2[i] | ||
p2 <- pdb_Ex$send2[i] | ||
distance_dir <- paste("residue_distance/pdb_ex/", pdbID, ".txt", sep = "") | ||
# run the function | ||
hotSpotAnalysis( | ||
gene0 = ss0, | ||
SNPlist0 = mutated_gene1, | ||
gene_annotation0 = gene_feature0, | ||
pdb_dir = distance_dir, | ||
sstart0 = p1, | ||
send0 = p2, | ||
result_dir = outfile0 | ||
) | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|