-
Notifications
You must be signed in to change notification settings - Fork 1
/
bamchop.aux
131 lines (130 loc) · 13.8 KB
/
bamchop.aux
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
\relax
\ifx\hyper@anchor\@undefined
\global \let \oldcontentsline\contentsline
\gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global \let \oldnewlabel\newlabel
\gdef \newlabel#1#2{\newlabelxx{#1}#2}
\gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\let \contentsline\oldcontentsline
\let \newlabel\oldnewlabel}
\else
\global \let \hyper@last\relax
\fi
\catcode 95\active
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}{section.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}BAM file}{2}{subsection.1.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Summary statisitics}{2}{subsection.1.2}}
\newlabel{SC@1}{{1.2}{2}{Summary statisitics\relax }{subsection.1.2}{}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces \textbf {Summary statistics} \newline \textbf {Effective size:} chromosome length without assembly gaps. \newline \textbf {Sequencing quality score:} assigned by the resequencing machine to indicate base calling confidence. \newline \textbf {Mapping quality score:} assigned by the alignment program to indicating mapping confidence. \newline \textbf {Mapping location:} strand-specific chromosomal location mapped to by the first base of one or more reads. \newline \textbf {Duplicated mapping:} the first base of multiple reads mapped to the same strand and chromosomal location. }}{2}{table.1}}
\gdef \LT@i {\LT@entry
{1}{49.0094pt}\LT@entry
{1}{69.27904pt}\LT@entry
{1}{50.45406pt}\LT@entry
{1}{43.83435pt}\LT@entry
{1}{58.98427pt}\LT@entry
{1}{53.32933pt}\LT@entry
{1}{58.88455pt}\LT@entry
{1}{65.7739pt}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Read count and sequencing coverage}{3}{section.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Depth categories}{3}{subsection.2.1}}
\newlabel{SC@2}{{2.1}{3}{Depth categories\relax }{subsection.2.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces \textbf {Depth by cutoffs.} Number and percentage of genomic locations (single bases) having the same or higher sequencing depth than given values.}}{3}{table.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Depth by chromosome}{3}{subsection.2.2}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{Sequencing depth by chromosome}}{3}{table.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Depth by genomic feature}{4}{subsection.2.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces \textbf {Average depth of genomic features.} Genomic features are regions annotated based on previous knowledge, such as the RefSeq gene track downloaded from UCSC genome browser. Many applications of high-throughput sequencing technologies, such as exome sequencing and RNA-seq, expect higher depth at exons.}}{4}{figure.1}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Sequencing quality}{5}{section.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Quality score categories}{5}{subsection.3.1}}
\newlabel{SC@3}{{3.1}{5}{Quality score categories\relax }{subsection.3.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces \textbf {Score categories.} The number and percentage of base calls having the quality socre equal to or higher than given values.)}}{5}{table.4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Overall score distribution}{5}{subsection.3.2}}
\newlabel{SC@4}{{3.2}{5}{Overall score distribution\relax }{subsection.3.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \textbf {Score distribution.} This distribution is based on all bases of randomly selected sequencing reads, so position-specific sequencing quality is not considered (see below). The quality scores are calculated by subtracting 33 from the integers corresponding to the ASCII characters in \textbf {<QUAL>}. If the convention of Sanger sequencing was applied to generate the ASCII characters, they are equal to -10*Log10(p value), where p value is the likelihood of incorrect base call.}}{5}{figure.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Position-specific score distribution}{6}{subsection.3.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces \textbf {Position-specific sequencing scores.} This plot shows quality scores at different positions within reads. The dashed lines represents the means of quality scores at different positions; whereas the heat gradient corresponds to percentiles.}}{6}{figure.3}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Mapping to reference}{7}{section.4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Mapping length}{7}{subsection.4.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces \textbf {Frequency of mapping lengths.} }}{7}{figure.4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Mapping flag}{7}{subsection.4.2}}
\gdef \LT@ii {\LT@entry
{1}{27.39467pt}\LT@entry
{1}{28.62483pt}\LT@entry
{1}{42.73949pt}\LT@entry
{1}{22.09967pt}\LT@entry
{1}{22.09967pt}\LT@entry
{1}{22.09967pt}\LT@entry
{1}{22.09967pt}\LT@entry
{1}{25.49956pt}\LT@entry
{1}{25.49956pt}\LT@entry
{1}{25.49956pt}\LT@entry
{1}{25.49956pt}\LT@entry
{1}{28.89944pt}\LT@entry
{1}{28.89944pt}\LT@entry
{1}{28.89944pt}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.1}Mapping flag categories}{8}{subsubsection.4.2.1}}
\newlabel{SC@5}{{4.2.1}{8}{Mapping flag categories\relax }{subsubsection.4.2.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces \textbf {Mapping flag categories.} The total number and percentage of reads flagged by each category.}}{8}{table.5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.2}Flag value breakdown}{8}{subsubsection.4.2.2}}
\@writefile{lot}{\contentsline {table}{\numberline {6}{The breakdown of values into flag categories.}}{8}{table.6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Mapping score}{8}{subsection.4.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.3.1}Mapping score categories}{9}{subsubsection.4.3.1}}
\newlabel{SC@6}{{4.3.1}{9}{Mapping score categories\relax }{subsubsection.4.3.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces \textbf {Mapping score categories.} The total number and percentage of reads having mapping scores equal to or higher than given values.}}{9}{table.7}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.3.2}Overall score distribution}{9}{subsubsection.4.3.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces \textbf {Mapping score distribution. } By definition, mapping quality equals to -10*Log10(p value), where p value is the likelihood of incorrect mapping; however, its calculation depends on individual programs.}}{9}{figure.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Mismatch (CIGAR)}{9}{subsection.4.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.1}Mismatch categories}{10}{subsubsection.4.4.1}}
\newlabel{SC@7}{{4.4.1}{10}{Mismatch categories\relax }{subsubsection.4.4.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {8}{\ignorespaces \textbf {Mismatch categories} The total number and percentage of reads having specific types of mismatches.}}{10}{table.8}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2}Gapped alignment}{10}{subsubsection.4.4.2}}
\newlabel{SC@8}{{4.4.2}{10}{Gapped alignment\relax }{subsubsection.4.4.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces \textbf {Distribution of gap size.} If the alignment program tried to align sub-sequence of the same read to remote locations, \textbf {<CIGAR>} will provide the size of gapped regions}}{10}{figure.6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Duplicated mapping}{10}{subsection.4.5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.5.1}Duplication level categories}{11}{subsubsection.4.5.1}}
\newlabel{SC@9}{{4.5.1}{11}{Duplication level categories\relax }{subsubsection.4.5.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {9}{\ignorespaces \textbf {Duplication level categories.} Numbers of mapping locations and reads having the duplication levels of the given values.}}{11}{table.9}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.5.2}Overall duplication distribution}{11}{subsubsection.4.5.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces textbf{Distribution of duplication levels}. The x-axis indicates the number of reads sharing the same mapping location of their 5'-end and the y-axis is the total occurance of each level. Only reads mapped to the forward strand and the first 10 million reads of each chromosome was used to reduce computation.}}{11}{figure.7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.6}Paired reads}{11}{subsection.4.6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.6.1}Read count summary}{12}{subsubsection.4.6.1}}
\newlabel{SC@10}{{4.6.1}{12}{Read count summary\relax }{subsubsection.4.6.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {10}{\ignorespaces \textbf {Paired-end reads.} Read counts in this table are based on the "flag" field in BAM file. Properly mapping paired-end reads are reads mapped to the opposite strand of the same chromosome. }}{12}{table.10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.6.2}Insertion size of paired reads}{12}{subsubsection.4.6.2}}
\newlabel{SC@11}{{4.6.2}{12}{Insertion size of paired reads\relax }{subsubsection.4.6.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces \textbf {Distribution of insertion size. }Insertion size is the distance between the mapping locations of the 5'-end of paired reads. It represents the size of DNA fragment to be sequenced.}}{12}{figure.8}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Base frequency}{13}{section.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Base N frequency}{13}{subsection.5.1}}
\newlabel{SC@12}{{5.1}{13}{Base N frequency\relax }{subsection.5.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {11}{\ignorespaces \textbf {N base frequency.} The Ns in the reads are assigned by the sequencing machine to suggest that the base cannot be determined due to low quality or other reasons. This table shows the number and percentage of Ns and reads including any Ns. Ns are then excluded from the following analyses of base frequency. }}{13}{table.11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Expected vs. observed frequency}{13}{subsection.5.2}}
\newlabel{SC@13}{{5.2}{13}{Expected vs. observed frequency\relax }{subsection.5.2}{}}
\@writefile{lot}{\contentsline {table}{\numberline {12}{\ignorespaces \textbf {Expected vs. observed base frequency.} The expected base frequency is based on the whole reference genome and the observed frequency is the base frequency in sequencing reads. Their ratio reflects the sequencing bias of nucleic acid bases.}}{13}{table.12}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}GC content}{13}{subsection.5.3}}
\newlabel{SC@14}{{5.3}{13}{GC content\relax }{subsection.5.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces \textbf {GC content.} Percentage of C/G bases within each read.}}{13}{figure.9}}
\gdef \LT@iii {\LT@entry
{1}{42.37164pt}\LT@entry
{1}{64.83089pt}\LT@entry
{1}{64.78902pt}\LT@entry
{1}{79.07573pt}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Position-specific base frequency}{14}{subsection.5.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.4.1}Single base}{14}{subsubsection.5.4.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces \textbf {Single base frequency at both ends.}The base frequency of the first and last 10 bases (the rightmost is the last base) of reads. The frequency was normalized by the overall base frequency with sequencing reads, so this summary indicates the preference of sequencing to start with a given nucleic acid base.}}{14}{figure.10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.4.2}First two bases}{14}{subsubsection.5.4.2}}
\newlabel{SC@15}{{5.4.2}{14}{First two bases\relax }{subsubsection.5.4.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces \textbf {First two base combination.}\relax \fontsize {9}{11}\selectfont \abovedisplayskip 8.5\p@ plus3\p@ minus4\p@ \abovedisplayshortskip \z@ plus2\p@ \belowdisplayshortskip 4\p@ plus2\p@ minus2\p@ \def \leftmargin \leftmargini \parsep 4\p@ plus2\p@ minus\p@ \topsep 8\p@ plus2\p@ minus4\p@ \itemsep 4\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 4\p@ plus2\p@ minus2\p@ \parsep 2\p@ plus\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip {This plot summarizes the frequency of the two-base combinations at the 5'-end of reads. The size of the blocks represent their relative frequency after adjusted by their expected frequency based on the position-specific frequency of the first two bases.}}}{14}{figure.11}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.4.3}5-mer frequency}{14}{subsubsection.5.4.3}}
\@writefile{lot}{\contentsline {table}{\numberline {13}{Lowest frequency}}{14}{table.13}}
\gdef \LT@iv {\LT@entry
{1}{43.30951pt}\LT@entry
{1}{64.83089pt}\LT@entry
{1}{64.78902pt}\LT@entry
{1}{79.07573pt}}
\gdef \LT@v {\LT@entry
{3}{43.30951pt}\LT@entry
{1}{64.83089pt}\LT@entry
{1}{64.78902pt}\LT@entry
{1}{79.07573pt}}
\@writefile{lot}{\contentsline {table}{\numberline {14}{Highest frequency}}{15}{table.14}}
\@writefile{lot}{\contentsline {table}{\numberline {15}{Highest relative enrichment}}{15}{table.15}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Alerts}{17}{section.6}}