Skip to content

Commit

Permalink
Add contaminants.fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Jan 29, 2018
1 parent 8398aa4 commit 9b48fcb
Show file tree
Hide file tree
Showing 3 changed files with 2,771 additions and 40 deletions.
10 changes: 7 additions & 3 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import proteomics.TheoSeq.MassTool;
import proteomics.Types.*;

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -30,7 +31,7 @@ public class BuildIndex {
private Map<String, Set<String>> seqProMap;
private final float ms1_bin_size;

public BuildIndex(Map<String, String> parameter_map) {
public BuildIndex(Map<String, String> parameter_map) throws IOException {
// initialize parameters
int min_chain_length = Integer.valueOf(parameter_map.get("min_chain_length")) + 2; // n and c are counted in the sequence
int max_chain_length = Integer.valueOf(parameter_map.get("max_chain_length")) + 2; // n and c are counted in the sequence
Expand Down Expand Up @@ -93,8 +94,11 @@ public BuildIndex(Map<String, String> parameter_map) {

// read protein database
DbTool db_tool_obj = new DbTool(db_path, parameter_map.get("database_type"));
Map<String, String> pro_seq_map = db_tool_obj.getProSeqMap();
pro_annotate_map = db_tool_obj.getProAnnotateMap();
DbTool contaminantsDb = new DbTool(null, "contaminants");
Map<String, String> pro_seq_map = contaminantsDb.getProSeqMap();
pro_seq_map.putAll(db_tool_obj.getProSeqMap()); // using the target sequence to replace contaminant sequence if there is conflict.
pro_annotate_map = contaminantsDb.getProAnnotateMap();
pro_annotate_map.putAll(db_tool_obj.getProAnnotateMap()); // using the target sequence to replace contaminant sequence if there is conflict.

// define a new MassTool object
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", mz_bin_size, one_minus_bin_offset);
Expand Down
77 changes: 40 additions & 37 deletions src/main/java/proteomics/TheoSeq/DbTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class DbTool {
private Map<String, String> pro_seq_map = new HashMap<>();
private Map<String, String> pro_annotate_map = new HashMap<>();

public DbTool(String db_name, String databaseType) {
public DbTool(String db_name, String databaseType) throws IOException {
String id = "";
String annotate;
StringBuilder seq = new StringBuilder(99999);
Expand All @@ -26,52 +26,55 @@ public DbTool(String db_name, String databaseType) {
header_pattern = Pattern.compile("^>([^\\s]+)[\\s|]+(.+)$");
} else if (databaseType.contentEquals("UniProt") || databaseType.contentEquals("SwissProt")) {
header_pattern = Pattern.compile("^>[^|]+\\|(.+)\\|(.+)$");
} else if (databaseType.contentEquals("contaminants")) {
header_pattern = Pattern.compile("^>([^ ]+) (.+)$");
} else if (databaseType.contentEquals("Others")) {
header_pattern = Pattern.compile("^>(.+)$");
}
else {
} else {
header_pattern = null;
logger.error("Incorrect database type ({}) in the parameter file.", databaseType);
System.exit(1);
}

try (BufferedReader db_reader = new BufferedReader(new FileReader(db_name))) {
String line;
while ((line = db_reader.readLine()) != null) {
line = line.trim();
Matcher head_matcher = header_pattern.matcher(line);
if (head_matcher.matches()) {
// This line is a header
if (!new_pro) {
// This isn't the first protein
pro_seq_map.put(id, seq.toString());
}
id = head_matcher.group(1).trim();
if (databaseType.contentEquals("Others")) {
annotate = id;
} else {
annotate = head_matcher.group(2).trim();
}
pro_annotate_map.put(id, annotate);
new_pro = true;
} else if (!line.isEmpty()) {
// This line is a body
if (new_pro) {
seq = new StringBuilder(99999);
seq.append(line);
new_pro = false;
} else {
seq.append(line);
}
BufferedReader db_reader;
if (databaseType.contentEquals("contaminants")) {
InputStream inputStream = getClass().getClassLoader().getResourceAsStream("contaminants.fasta");
db_reader = new BufferedReader(new InputStreamReader(inputStream));
} else {
db_reader = new BufferedReader(new FileReader(db_name));
}
String line;
while ((line = db_reader.readLine()) != null) {
line = line.trim();
Matcher head_matcher = header_pattern.matcher(line);
if (head_matcher.matches()) {
// This line is a header
if (!new_pro) {
// This isn't the first protein
pro_seq_map.put(id, seq.toString());
}
id = head_matcher.group(1).trim();
if (databaseType.contentEquals("Others")) {
annotate = id;
} else {
annotate = head_matcher.group(2).trim();
}
pro_annotate_map.put(id, annotate);
new_pro = true;
} else if (!line.isEmpty()) {
// This line is a body
if (new_pro) {
seq = new StringBuilder(99999);
seq.append(line);
new_pro = false;
} else {
seq.append(line);
}
}
// Last protein
pro_seq_map.put(id, seq.toString());
} catch (IOException | PatternSyntaxException ex) {
logger.error(ex.toString());
ex.printStackTrace();
System.exit(1);
}
db_reader.close();
// Last protein
pro_seq_map.put(id, seq.toString());
}

public Map<String, String> getProSeqMap() {
Expand Down
Loading

0 comments on commit 9b48fcb

Please sign in to comment.