diff --git a/docs/source/literature.bib b/docs/source/literature.bib index bc524d3..4a2b9c6 100644 --- a/docs/source/literature.bib +++ b/docs/source/literature.bib @@ -3,7 +3,8 @@ @misc{nutpie keywords = {Software}, license = {MIT}, title = {{nutpie}}, - url = {https://github.com/pymc-devs/nutpie} + url = {https://github.com/pymc-devs/nutpie}, + year = {2022}, } @article{scipy, @@ -46,7 +47,7 @@ @article{matplotlib @misc{matplotlibzenodo, author = {{The Matplotlib Development Team}}, - title = {Matplotlib: Visualization with Python}, + title = {{Matplotlib: Visualization with Python}}, keywords = {software}, month = may, year = 2024, @@ -58,17 +59,16 @@ @misc{matplotlibzenodo @article{RN173, author = {Hoffmann, Matthew D. and Gelman, Andrew}, - title = {The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo}, + title = {{The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo}}, journal = {Journal of Machine Learning Research}, volume = {15}, year = {2014}, - type = {Journal Article} } @article{RN150, author = {Abril-Pla, O. and Andreani, V. and Carroll, C. and Dong, L. and Fonnesbeck, C. J. and Kochurov, M. and Kumar, R. and Lao, J. and Luhmann, C. C. and Martin, O. A. and Osthege, M. and Vieira, R. and Wiecki, T. and Zinkov, R.}, - title = {{PyMC}: a modern, and comprehensive probabilistic programming framework in Python}, - journal = {PeerJ Comput Sci}, + title = {{PyMC}: a modern, and comprehensive probabilistic programming framework in {P}ython}, + journal = {PeerJ Computer Science}, volume = {9}, pages = {e1516}, issn = {2376-5992 (Electronic) @@ -76,47 +76,44 @@ @article{RN150 doi = {10.7717/peerj-cs.1516}, url = {https://www.ncbi.nlm.nih.gov/pubmed/37705656}, year = {2023}, - type = {Journal Article} } @book{RN162, author = {Kruschke, John K.}, - title = {Doing Bayesian Data Analysis}, + title = {{Doing Bayesian Data Analysis}}, edition = {Second Edition}, publisher={Academic Press}, isbn = {9780123814852}, year = {2015}, type = {Book}, - doi = {http://dx.doi.org/10.1016/B978-0-12-405888-0.00001-5} + doi = {10.1016/B978-0-12-405888-0.00001-5} } @article{RN144, author = {Azzalini, A.}, title = {A class of distributions which includes the normal ones}, - journal = {Scand. J. Statist.}, + journal = {Scandinavian Journal of Statistics}, volume = {12}, pages = {171-178}, year = {1985}, - type = {Journal Article}, url = {http://www.jstor.org/stable/4615982}, } @article{RN152, author = {Gelman, Andrew and Rubin, Donald B.}, - title = {Inference from Iterative Simulation Using Multiple Sequences}, + title = {{Inference from Iterative Simulation Using Multiple Sequences}}, journal = {Statistical Science}, volume = {7}, number = {4}, year = {1992}, - type = {Journal Article}, doi = {10.1214/ss/1177011136} } @article{RN153, author = {Grushka, E.}, - title = {Characterization of exponentially modified Gaussian peaks in chromatography}, - journal = {Anal Chem}, + title = {{Characterization of exponentially modified Gaussian peaks in chromatography}}, + journal = {Analytical Chemistry}, volume = {44}, number = {11}, pages = {1733-8}, @@ -125,13 +122,12 @@ @article{RN153 doi = {10.1021/ac60319a011}, url = {https://www.ncbi.nlm.nih.gov/pubmed/22324584}, year = {1972}, - type = {Journal Article} } @article{RN149, author = {Hemmerich, J. and Noack, S. and Wiechert, W. and Oldiges, M.}, - title = {Microbioreactor Systems for Accelerated Bioprocess Development}, - journal = {Biotechnol J}, + title = {{Microbioreactor Systems for Accelerated Bioprocess Development}}, + journal = {Biotechnology Journal}, volume = {13}, number = {4}, pages = {e1700141}, @@ -140,13 +136,12 @@ @article{RN149 doi = {10.1002/biot.201700141}, url = {https://www.ncbi.nlm.nih.gov/pubmed/29283217}, year = {2018}, - type = {Journal Article} } @article{RN148, author = {Kostov, Y. and Harms, P. and Randers-Eichhorn, L. and Rao, G.}, title = {Low-cost microbioreactor for high-throughput bioprocessing}, - journal = {Biotechnol Bioeng}, + journal = {Biotechnology and Bioengineering}, volume = {72}, number = {3}, pages = {346-52}, @@ -155,12 +150,11 @@ @article{RN148 doi = {10.1002/1097-0290(20010205)72:3<346::aid-bit12>3.0.co;2-x}, url = {https://www.ncbi.nlm.nih.gov/pubmed/11135205}, year = {2001}, - type = {Journal Article} } @article{RN145, author = {Vehtari, Aki and Gelman, Andrew and Gabry, Jonah}, - title = {Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC}, + title = {{Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC}}, journal = {Statistics and Computing}, volume = {27}, number = {5}, @@ -169,29 +163,26 @@ @article{RN145 1573-1375}, doi = {10.1007/s11222-016-9696-4}, year = {2016}, - type = {Journal Article} } @article{RN146, author = {Watanabe, Sumio}, - title = {Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory}, - journal = {Journal of machine learning research}, + title = {{Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory}}, + journal = {Journal of Machine Learning Research}, volume = {11}, pages = {3571-3594}, year = {2010}, - type = {Journal Article}, } @article{RN147, author = {Kumar, Ravin and Carroll, Colin and Hartikainen, Ari and Martin, Osvaldo}, - title = {ArviZ a unified library for exploratory analysis of Bayesian models in Python}, + title = {{ArviZ a unified library for exploratory analysis of Bayesian models in Python}}, journal = {Journal of Open Source Software}, volume = {4}, number = {33}, issn = {2475-9066}, doi = {10.21105/joss.01143}, year = {2019}, - type = {Journal Article} } @article{harris2020array, @@ -217,60 +208,60 @@ @article{harris2020array } @article{vivo2012bayesian, - title={Bayesian approach for peak detection in two-dimensional chromatography}, + title={{Bayesian approach for peak detection in two-dimensional chromatography}}, author={Viv{\'o}-Truyols, Gabriel}, - journal={Analytical chemistry}, + journal={Analytical Chemistry}, volume={84}, number={6}, pages={2622--2630}, year={2012}, - doi={https://doi.org/10.1021/ac202124t}, + doi={10.1021/ac202124t}, publisher={ACS Publications} } @article{woldegebriel2015probabilistic, - title={Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics}, + title={{Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics}}, author={Woldegebriel, Michael and Viv{\'o}-Truyols, Gabriel}, - journal={Analytical chemistry}, + journal={Analytical Chemistry}, volume={87}, number={14}, pages={7345--7355}, year={2015}, - doi={https://doi.org/10.1021/acs.analchem.5b01521}, + doi={10.1021/acs.analchem.5b01521}, publisher={ACS Publications} } @article{briskot2019prediction, - title={Prediction uncertainty assessment of chromatography models using Bayesian inference}, + title={{Prediction uncertainty assessment of chromatography models using Bayesian inference}}, author={Briskot, Till and St\"{u}ckler, Ferdinand and Wittkopp, Felix and Williams, Christopher and Yang, Jessica and Konrad, Susanne and Doninger, Katharina and Griesbach, Jan and Bennecke, Moritz and Hepbildikler, Stefan and others}, journal={Journal of Chromatography A}, volume={1587}, pages={101--110}, year={2019}, - doi={https://doi.org/10.1016/j.chroma.2018.11.076}, + doi={10.1016/j.chroma.2018.11.076}, publisher={Elsevier} } @article{yamamoto2021uncertainty, - title={Uncertainty quantification for chromatography model parameters by Bayesian inference using sequential Monte Carlo method}, + title={{Uncertainty quantification for chromatography model parameters by Bayesian inference using sequential Monte Carlo method}}, author={Yamamoto, Yota and Yajima, Tomoyuki and Kawajiri, Yoshiaki}, journal={Chemical Engineering Research and Design}, volume={175}, pages={223--237}, year={2021}, - doi={https://doi.org/10.1016/j.cherd.2021.09.003}, + doi={10.1016/j.cherd.2021.09.003}, publisher={Elsevier} } @article{wiczling2016much, - title={How much can we learn from a single chromatographic experiment? A Bayesian perspective}, + title={{How much can we learn from a single chromatographic experiment? A Bayesian perspective}}, author={Wiczling, Pawe{\l} and Kaliszan, Roman}, - journal={Analytical chemistry}, + journal={Analytical Chemistry}, volume={88}, number={1}, pages={997--1002}, year={2016}, - doi={https://doi.org/10.1021/acs.analchem.5b03859}, + doi={10.1021/acs.analchem.5b03859}, publisher={ACS Publications} } @@ -282,7 +273,7 @@ @article{kelly1971estimation number={10}, pages={1170--1183}, year={1971}, - doi={https://doi.org/10.1021/ac60304a011}, + doi={10.1021/ac60304a011}, publisher={ACS Publications} } @@ -294,6 +285,6 @@ @article{kelly1971application number={10}, pages={1184--1195}, year={1971}, - doi={https://doi.org/10.1021/ac60304a005}, + doi={10.1021/ac60304a005}, publisher={ACS Publications} } diff --git a/paper/literature.bib b/paper/literature.bib index 051ede5..4a2b9c6 100644 --- a/paper/literature.bib +++ b/paper/literature.bib @@ -3,7 +3,8 @@ @misc{nutpie keywords = {Software}, license = {MIT}, title = {{nutpie}}, - url = {https://github.com/pymc-devs/nutpie} + url = {https://github.com/pymc-devs/nutpie}, + year = {2022}, } @article{scipy, @@ -67,7 +68,7 @@ @article{RN173 @article{RN150, author = {Abril-Pla, O. and Andreani, V. and Carroll, C. and Dong, L. and Fonnesbeck, C. J. and Kochurov, M. and Kumar, R. and Lao, J. and Luhmann, C. C. and Martin, O. A. and Osthege, M. and Vieira, R. and Wiecki, T. and Zinkov, R.}, title = {{PyMC}: a modern, and comprehensive probabilistic programming framework in {P}ython}, - journal = {PeerJ Comput Sci}, + journal = {PeerJ Computer Science}, volume = {9}, pages = {e1516}, issn = {2376-5992 (Electronic) @@ -91,7 +92,7 @@ @book{RN162 @article{RN144, author = {Azzalini, A.}, title = {A class of distributions which includes the normal ones}, - journal = {Scand. J. Statist.}, + journal = {Scandinavian Journal of Statistics}, volume = {12}, pages = {171-178}, year = {1985}, @@ -112,7 +113,7 @@ @article{RN152 @article{RN153, author = {Grushka, E.}, title = {{Characterization of exponentially modified Gaussian peaks in chromatography}}, - journal = {Anal Chem}, + journal = {Analytical Chemistry}, volume = {44}, number = {11}, pages = {1733-8}, @@ -126,7 +127,7 @@ @article{RN153 @article{RN149, author = {Hemmerich, J. and Noack, S. and Wiechert, W. and Oldiges, M.}, title = {{Microbioreactor Systems for Accelerated Bioprocess Development}}, - journal = {Biotechnol J}, + journal = {Biotechnology Journal}, volume = {13}, number = {4}, pages = {e1700141}, @@ -140,7 +141,7 @@ @article{RN149 @article{RN148, author = {Kostov, Y. and Harms, P. and Randers-Eichhorn, L. and Rao, G.}, title = {Low-cost microbioreactor for high-throughput bioprocessing}, - journal = {Biotechnol Bioeng}, + journal = {Biotechnology and Bioengineering}, volume = {72}, number = {3}, pages = {346-52}, @@ -167,7 +168,7 @@ @article{RN145 @article{RN146, author = {Watanabe, Sumio}, title = {{Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory}}, - journal = {Journal of machine learning research}, + journal = {Journal of Machine Learning Research}, volume = {11}, pages = {3571-3594}, year = {2010}, @@ -209,7 +210,7 @@ @article{harris2020array @article{vivo2012bayesian, title={{Bayesian approach for peak detection in two-dimensional chromatography}}, author={Viv{\'o}-Truyols, Gabriel}, - journal={Analytical chemistry}, + journal={Analytical Chemistry}, volume={84}, number={6}, pages={2622--2630}, @@ -221,7 +222,7 @@ @article{vivo2012bayesian @article{woldegebriel2015probabilistic, title={{Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics}}, author={Woldegebriel, Michael and Viv{\'o}-Truyols, Gabriel}, - journal={Analytical chemistry}, + journal={Analytical Chemistry}, volume={87}, number={14}, pages={7345--7355}, @@ -255,7 +256,7 @@ @article{yamamoto2021uncertainty @article{wiczling2016much, title={{How much can we learn from a single chromatographic experiment? A Bayesian perspective}}, author={Wiczling, Pawe{\l} and Kaliszan, Roman}, - journal={Analytical chemistry}, + journal={Analytical Chemistry}, volume={88}, number={1}, pages={997--1002}, diff --git a/paper/paper.md b/paper/paper.md index d9c41d4..8bc2b79 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -42,10 +42,10 @@ bibliography: literature.bib A major bottleneck of chromatography-based analytics has been the elusive fully automated identification and integration of peak data without the need of extensive human supervision. The presented Python package $\texttt{PeakPerformance}$ applies Bayesian inference to chromatographic peak fitting, and provides an automated approach featuring model selection and uncertainty quantification. -Regarding peak acceptance, it improves on vendor software solutions with more sophisticated, multi-layered metrics for decision making based on convergence of the parameter estimation, as well as the uncertainties of peak parameters. +Regarding peak acceptance, it improves on vendor software solutions with more sophisticated, multi-layered metrics for decision making based on convergence of the parameter estimation as well as the uncertainties of peak parameters. Currently, its application is focused on data from targeted liquid chromatography tandem mass spectrometry (LC-MS/MS), but its design allows for an expansion to other chromatographic techniques and accommodates users with little programming experience by supplying convenience functions and relying on Microsoft Excel for data input and reporting. -$\texttt{PeakPerformance}$ is implemented in Python, its source code is available on [GitHub](https://github.com/JuBiotech/peak-performance), and a through documentation is available under [https://peak-performance.rtfd.io](https://peak-performance.rtfd.io). -It is unit-tested on Linux and Windows and accompanied by documentation as well as example notebooks. +$\texttt{PeakPerformance}$ is implemented in Python, its source code is available on [GitHub](https://github.com/JuBiotech/peak-performance), and a thorough documentation is available under [https://peak-performance.rtfd.io](https://peak-performance.rtfd.io). +It is unit-tested on Linux and Windows and accompanied by example notebooks. # Statement of need @@ -90,11 +90,11 @@ __Figure 1:__ Overview of the pre-manufactured data analysis pipeline featured i Subsequently, the peak analysis pipeline can be started with the function $\texttt{pipeline()}$ from the $\texttt{pipeline}$ module. Depending on whether the "pre-filtering" option was selected, an optional filtering step will be executed to reject signals where clearly no peak is present before sampling, thus saving computation time. -Upon passing the first filter, a Markov chain Monte Carlo (MCMC) simulation is conducted using a No-U-Turn Sampler (NUTS) [@RN173], preferably - if installed in the Python environment - the nutpie sampler [@nutpie] due to its highly increased performance compared to the default sampler of PyMC. -When a posterior distribution has been obtained, the main filtering step is next in line which checks the convergence of the Markov chains via the potential scale reduction factor [@RN152] or $\hat{R}$ statistic and based on the uncertainty of the determined peak parameters. +Upon passing the first filter, a Markov chain Monte Carlo (MCMC) simulation is conducted using a No-U-Turn Sampler (NUTS) [@RN173], preferably - if installed in the Python environment - the $\texttt{nutpie}$ sampler [@nutpie] due to its highly increased performance compared to the default sampler of PyMC. +When a posterior distribution has been obtained, the main filtering step is next in line checking the convergence of the Markov chains via the potential scale reduction factor [@RN152] or $\hat{R}$ statistic and based on the uncertainty of the determined peak parameters. If a signal was accepted as a peak, a posterior predictive check is conducted and added to the inference data object resulting from the model simulation. -Regarding the performance of the simulation, in our tests an analysis of a single peaks took 20 s to 30 s and of a double peaks 25 s to 90 s. -This is of course dependent on the power of the computer as well as whether an additional simulation with an increased number of samples needs to be conducted. +Regarding the performance of the simulation, in our tests an analysis of a single peak took 20 s to 30 s and of a double peak 25 s to 90 s. +This is of course dependent on the type of sampler, the power of the computer as well as whether an additional simulation with an increased number of samples needs to be conducted. ## Peak fitting results and diagnostic plots @@ -108,7 +108,7 @@ The posterior plot presents the fit of the intensity function alongside the raw The first row of Figure 2 presents two such examples where the single peak diagram shows the histidine (His) fragment with a m/z ratio of 110 Da and the double peak diagram the leucine (Leu) and isoleucine (Ile) fragments with a m/z ratio of 86 Da. ![](./Fig4_peak_results.png) -__Figure 2:__ Results plots for a single His peak and a double Leu and Ile peak depicting the peak fit (first row) and the posterior predictive checks (second row) alongside the raw data. The numerical results are listed in table 2. +__Figure 2:__ Results plots for a single His peak and a double Leu and Ile peak depicting the peak fit (first row) and the posterior predictive checks (second row) alongside the raw data. The numerical results are listed in Table 2. The posterior predictive plots in the second row of Figure 4 are provided for visual posterior predictive checks, namely the comparison of observed and predicted data distribution. Since a posterior predictive check is based on drawing samples from the likelihood function, the result represents the theoretical range of values encompassed by the model.