Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Final corrections prior to publication in JOSS #45

Merged
merged 3 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 34 additions & 43 deletions docs/source/literature.bib
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ @misc{nutpie
keywords = {Software},
license = {MIT},
title = {{nutpie}},
url = {https://github.com/pymc-devs/nutpie}
url = {https://github.com/pymc-devs/nutpie},
year = {2022},
}

@article{scipy,
Expand Down Expand Up @@ -46,7 +47,7 @@ @article{matplotlib

@misc{matplotlibzenodo,
author = {{The Matplotlib Development Team}},
title = {Matplotlib: Visualization with Python},
title = {{Matplotlib: Visualization with Python}},
keywords = {software},
month = may,
year = 2024,
Expand All @@ -58,65 +59,61 @@ @misc{matplotlibzenodo

@article{RN173,
author = {Hoffmann, Matthew D. and Gelman, Andrew},
title = {The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo},
title = {{The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo}},
journal = {Journal of Machine Learning Research},
volume = {15},
year = {2014},
type = {Journal Article}
}

@article{RN150,
author = {Abril-Pla, O. and Andreani, V. and Carroll, C. and Dong, L. and Fonnesbeck, C. J. and Kochurov, M. and Kumar, R. and Lao, J. and Luhmann, C. C. and Martin, O. A. and Osthege, M. and Vieira, R. and Wiecki, T. and Zinkov, R.},
title = {{PyMC}: a modern, and comprehensive probabilistic programming framework in Python},
journal = {PeerJ Comput Sci},
title = {{PyMC}: a modern, and comprehensive probabilistic programming framework in {P}ython},
journal = {PeerJ Computer Science},
volume = {9},
pages = {e1516},
issn = {2376-5992 (Electronic)
2376-5992 (Linking)},
doi = {10.7717/peerj-cs.1516},
url = {https://www.ncbi.nlm.nih.gov/pubmed/37705656},
year = {2023},
type = {Journal Article}
}

@book{RN162,
author = {Kruschke, John K.},
title = {Doing Bayesian Data Analysis},
title = {{Doing Bayesian Data Analysis}},
edition = {Second Edition},
publisher={Academic Press},
isbn = {9780123814852},
year = {2015},
type = {Book},
doi = {http://dx.doi.org/10.1016/B978-0-12-405888-0.00001-5}
doi = {10.1016/B978-0-12-405888-0.00001-5}
}

@article{RN144,
author = {Azzalini, A.},
title = {A class of distributions which includes the normal ones},
journal = {Scand. J. Statist.},
journal = {Scandinavian Journal of Statistics},
volume = {12},
pages = {171-178},
year = {1985},
type = {Journal Article},
url = {http://www.jstor.org/stable/4615982},
}


@article{RN152,
author = {Gelman, Andrew and Rubin, Donald B.},
title = {Inference from Iterative Simulation Using Multiple Sequences},
title = {{Inference from Iterative Simulation Using Multiple Sequences}},
journal = {Statistical Science},
volume = {7},
number = {4},
year = {1992},
type = {Journal Article},
doi = {10.1214/ss/1177011136}
}

@article{RN153,
author = {Grushka, E.},
title = {Characterization of exponentially modified Gaussian peaks in chromatography},
journal = {Anal Chem},
title = {{Characterization of exponentially modified Gaussian peaks in chromatography}},
journal = {Analytical Chemistry},
volume = {44},
number = {11},
pages = {1733-8},
Expand All @@ -125,13 +122,12 @@ @article{RN153
doi = {10.1021/ac60319a011},
url = {https://www.ncbi.nlm.nih.gov/pubmed/22324584},
year = {1972},
type = {Journal Article}
}

@article{RN149,
author = {Hemmerich, J. and Noack, S. and Wiechert, W. and Oldiges, M.},
title = {Microbioreactor Systems for Accelerated Bioprocess Development},
journal = {Biotechnol J},
title = {{Microbioreactor Systems for Accelerated Bioprocess Development}},
journal = {Biotechnology Journal},
volume = {13},
number = {4},
pages = {e1700141},
Expand All @@ -140,13 +136,12 @@ @article{RN149
doi = {10.1002/biot.201700141},
url = {https://www.ncbi.nlm.nih.gov/pubmed/29283217},
year = {2018},
type = {Journal Article}
}

@article{RN148,
author = {Kostov, Y. and Harms, P. and Randers-Eichhorn, L. and Rao, G.},
title = {Low-cost microbioreactor for high-throughput bioprocessing},
journal = {Biotechnol Bioeng},
journal = {Biotechnology and Bioengineering},
volume = {72},
number = {3},
pages = {346-52},
Expand All @@ -155,12 +150,11 @@ @article{RN148
doi = {10.1002/1097-0290(20010205)72:3<346::aid-bit12>3.0.co;2-x},
url = {https://www.ncbi.nlm.nih.gov/pubmed/11135205},
year = {2001},
type = {Journal Article}
}

@article{RN145,
author = {Vehtari, Aki and Gelman, Andrew and Gabry, Jonah},
title = {Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC},
title = {{Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC}},
journal = {Statistics and Computing},
volume = {27},
number = {5},
Expand All @@ -169,29 +163,26 @@ @article{RN145
1573-1375},
doi = {10.1007/s11222-016-9696-4},
year = {2016},
type = {Journal Article}
}

@article{RN146,
author = {Watanabe, Sumio},
title = {Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory},
journal = {Journal of machine learning research},
title = {{Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory}},
journal = {Journal of Machine Learning Research},
volume = {11},
pages = {3571-3594},
year = {2010},
type = {Journal Article},
}

@article{RN147,
author = {Kumar, Ravin and Carroll, Colin and Hartikainen, Ari and Martin, Osvaldo},
title = {ArviZ a unified library for exploratory analysis of Bayesian models in Python},
title = {{ArviZ a unified library for exploratory analysis of Bayesian models in Python}},
journal = {Journal of Open Source Software},
volume = {4},
number = {33},
issn = {2475-9066},
doi = {10.21105/joss.01143},
year = {2019},
type = {Journal Article}
}

@article{harris2020array,
Expand All @@ -217,60 +208,60 @@ @article{harris2020array
}

@article{vivo2012bayesian,
title={Bayesian approach for peak detection in two-dimensional chromatography},
title={{Bayesian approach for peak detection in two-dimensional chromatography}},
author={Viv{\'o}-Truyols, Gabriel},
journal={Analytical chemistry},
journal={Analytical Chemistry},
volume={84},
number={6},
pages={2622--2630},
year={2012},
doi={https://doi.org/10.1021/ac202124t},
doi={10.1021/ac202124t},
publisher={ACS Publications}
}

@article{woldegebriel2015probabilistic,
title={Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics},
title={{Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics}},
author={Woldegebriel, Michael and Viv{\'o}-Truyols, Gabriel},
journal={Analytical chemistry},
journal={Analytical Chemistry},
volume={87},
number={14},
pages={7345--7355},
year={2015},
doi={https://doi.org/10.1021/acs.analchem.5b01521},
doi={10.1021/acs.analchem.5b01521},
publisher={ACS Publications}
}

@article{briskot2019prediction,
title={Prediction uncertainty assessment of chromatography models using Bayesian inference},
title={{Prediction uncertainty assessment of chromatography models using Bayesian inference}},
author={Briskot, Till and St\"{u}ckler, Ferdinand and Wittkopp, Felix and Williams, Christopher and Yang, Jessica and Konrad, Susanne and Doninger, Katharina and Griesbach, Jan and Bennecke, Moritz and Hepbildikler, Stefan and others},
journal={Journal of Chromatography A},
volume={1587},
pages={101--110},
year={2019},
doi={https://doi.org/10.1016/j.chroma.2018.11.076},
doi={10.1016/j.chroma.2018.11.076},
publisher={Elsevier}
}

@article{yamamoto2021uncertainty,
title={Uncertainty quantification for chromatography model parameters by Bayesian inference using sequential Monte Carlo method},
title={{Uncertainty quantification for chromatography model parameters by Bayesian inference using sequential Monte Carlo method}},
author={Yamamoto, Yota and Yajima, Tomoyuki and Kawajiri, Yoshiaki},
journal={Chemical Engineering Research and Design},
volume={175},
pages={223--237},
year={2021},
doi={https://doi.org/10.1016/j.cherd.2021.09.003},
doi={10.1016/j.cherd.2021.09.003},
publisher={Elsevier}
}

@article{wiczling2016much,
title={How much can we learn from a single chromatographic experiment? A Bayesian perspective},
title={{How much can we learn from a single chromatographic experiment? A Bayesian perspective}},
author={Wiczling, Pawe{\l} and Kaliszan, Roman},
journal={Analytical chemistry},
journal={Analytical Chemistry},
volume={88},
number={1},
pages={997--1002},
year={2016},
doi={https://doi.org/10.1021/acs.analchem.5b03859},
doi={10.1021/acs.analchem.5b03859},
publisher={ACS Publications}
}

Expand All @@ -282,7 +273,7 @@ @article{kelly1971estimation
number={10},
pages={1170--1183},
year={1971},
doi={https://doi.org/10.1021/ac60304a011},
doi={10.1021/ac60304a011},
publisher={ACS Publications}
}

Expand All @@ -294,6 +285,6 @@ @article{kelly1971application
number={10},
pages={1184--1195},
year={1971},
doi={https://doi.org/10.1021/ac60304a005},
doi={10.1021/ac60304a005},
publisher={ACS Publications}
}
21 changes: 11 additions & 10 deletions paper/literature.bib
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ @misc{nutpie
keywords = {Software},
license = {MIT},
title = {{nutpie}},
url = {https://github.com/pymc-devs/nutpie}
url = {https://github.com/pymc-devs/nutpie},
year = {2022},
}

@article{scipy,
Expand Down Expand Up @@ -67,7 +68,7 @@ @article{RN173
@article{RN150,
author = {Abril-Pla, O. and Andreani, V. and Carroll, C. and Dong, L. and Fonnesbeck, C. J. and Kochurov, M. and Kumar, R. and Lao, J. and Luhmann, C. C. and Martin, O. A. and Osthege, M. and Vieira, R. and Wiecki, T. and Zinkov, R.},
title = {{PyMC}: a modern, and comprehensive probabilistic programming framework in {P}ython},
journal = {PeerJ Comput Sci},
journal = {PeerJ Computer Science},
volume = {9},
pages = {e1516},
issn = {2376-5992 (Electronic)
Expand All @@ -91,7 +92,7 @@ @book{RN162
@article{RN144,
author = {Azzalini, A.},
title = {A class of distributions which includes the normal ones},
journal = {Scand. J. Statist.},
journal = {Scandinavian Journal of Statistics},
volume = {12},
pages = {171-178},
year = {1985},
Expand All @@ -112,7 +113,7 @@ @article{RN152
@article{RN153,
author = {Grushka, E.},
title = {{Characterization of exponentially modified Gaussian peaks in chromatography}},
journal = {Anal Chem},
journal = {Analytical Chemistry},
volume = {44},
number = {11},
pages = {1733-8},
Expand All @@ -126,7 +127,7 @@ @article{RN153
@article{RN149,
author = {Hemmerich, J. and Noack, S. and Wiechert, W. and Oldiges, M.},
title = {{Microbioreactor Systems for Accelerated Bioprocess Development}},
journal = {Biotechnol J},
journal = {Biotechnology Journal},
volume = {13},
number = {4},
pages = {e1700141},
Expand All @@ -140,7 +141,7 @@ @article{RN149
@article{RN148,
author = {Kostov, Y. and Harms, P. and Randers-Eichhorn, L. and Rao, G.},
title = {Low-cost microbioreactor for high-throughput bioprocessing},
journal = {Biotechnol Bioeng},
journal = {Biotechnology and Bioengineering},
volume = {72},
number = {3},
pages = {346-52},
Expand All @@ -167,7 +168,7 @@ @article{RN145
@article{RN146,
author = {Watanabe, Sumio},
title = {{Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory}},
journal = {Journal of machine learning research},
journal = {Journal of Machine Learning Research},
volume = {11},
pages = {3571-3594},
year = {2010},
Expand Down Expand Up @@ -209,7 +210,7 @@ @article{harris2020array
@article{vivo2012bayesian,
title={{Bayesian approach for peak detection in two-dimensional chromatography}},
author={Viv{\'o}-Truyols, Gabriel},
journal={Analytical chemistry},
journal={Analytical Chemistry},
volume={84},
number={6},
pages={2622--2630},
Expand All @@ -221,7 +222,7 @@ @article{vivo2012bayesian
@article{woldegebriel2015probabilistic,
title={{Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics}},
author={Woldegebriel, Michael and Viv{\'o}-Truyols, Gabriel},
journal={Analytical chemistry},
journal={Analytical Chemistry},
volume={87},
number={14},
pages={7345--7355},
Expand Down Expand Up @@ -255,7 +256,7 @@ @article{yamamoto2021uncertainty
@article{wiczling2016much,
title={{How much can we learn from a single chromatographic experiment? A Bayesian perspective}},
author={Wiczling, Pawe{\l} and Kaliszan, Roman},
journal={Analytical chemistry},
journal={Analytical Chemistry},
volume={88},
number={1},
pages={997--1002},
Expand Down
16 changes: 8 additions & 8 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ bibliography: literature.bib

A major bottleneck of chromatography-based analytics has been the elusive fully automated identification and integration of peak data without the need of extensive human supervision.
The presented Python package $\texttt{PeakPerformance}$ applies Bayesian inference to chromatographic peak fitting, and provides an automated approach featuring model selection and uncertainty quantification.
Regarding peak acceptance, it improves on vendor software solutions with more sophisticated, multi-layered metrics for decision making based on convergence of the parameter estimation, as well as the uncertainties of peak parameters.
Regarding peak acceptance, it improves on vendor software solutions with more sophisticated, multi-layered metrics for decision making based on convergence of the parameter estimation as well as the uncertainties of peak parameters.
Currently, its application is focused on data from targeted liquid chromatography tandem mass spectrometry (LC-MS/MS), but its design allows for an expansion to other chromatographic techniques and accommodates users with little programming experience by supplying convenience functions and relying on Microsoft Excel for data input and reporting.
$\texttt{PeakPerformance}$ is implemented in Python, its source code is available on [GitHub](https://github.com/JuBiotech/peak-performance), and a through documentation is available under [https://peak-performance.rtfd.io](https://peak-performance.rtfd.io).
It is unit-tested on Linux and Windows and accompanied by documentation as well as example notebooks.
$\texttt{PeakPerformance}$ is implemented in Python, its source code is available on [GitHub](https://github.com/JuBiotech/peak-performance), and a thorough documentation is available under [https://peak-performance.rtfd.io](https://peak-performance.rtfd.io).
It is unit-tested on Linux and Windows and accompanied by example notebooks.

# Statement of need

Expand Down Expand Up @@ -90,11 +90,11 @@ __Figure 1:__ Overview of the pre-manufactured data analysis pipeline featured i

Subsequently, the peak analysis pipeline can be started with the function $\texttt{pipeline()}$ from the $\texttt{pipeline}$ module.
Depending on whether the "pre-filtering" option was selected, an optional filtering step will be executed to reject signals where clearly no peak is present before sampling, thus saving computation time.
Upon passing the first filter, a Markov chain Monte Carlo (MCMC) simulation is conducted using a No-U-Turn Sampler (NUTS) [@RN173], preferably - if installed in the Python environment - the nutpie sampler [@nutpie] due to its highly increased performance compared to the default sampler of PyMC.
When a posterior distribution has been obtained, the main filtering step is next in line which checks the convergence of the Markov chains via the potential scale reduction factor [@RN152] or $\hat{R}$ statistic and based on the uncertainty of the determined peak parameters.
Upon passing the first filter, a Markov chain Monte Carlo (MCMC) simulation is conducted using a No-U-Turn Sampler (NUTS) [@RN173], preferably - if installed in the Python environment - the $\texttt{nutpie}$ sampler [@nutpie] due to its highly increased performance compared to the default sampler of PyMC.
When a posterior distribution has been obtained, the main filtering step is next in line checking the convergence of the Markov chains via the potential scale reduction factor [@RN152] or $\hat{R}$ statistic and based on the uncertainty of the determined peak parameters.
If a signal was accepted as a peak, a posterior predictive check is conducted and added to the inference data object resulting from the model simulation.
Regarding the performance of the simulation, in our tests an analysis of a single peaks took 20 s to 30 s and of a double peaks 25 s to 90 s.
This is of course dependent on the power of the computer as well as whether an additional simulation with an increased number of samples needs to be conducted.
Regarding the performance of the simulation, in our tests an analysis of a single peak took 20 s to 30 s and of a double peak 25 s to 90 s.
This is of course dependent on the type of sampler, the power of the computer as well as whether an additional simulation with an increased number of samples needs to be conducted.


## Peak fitting results and diagnostic plots
Expand All @@ -108,7 +108,7 @@ The posterior plot presents the fit of the intensity function alongside the raw
The first row of Figure 2 presents two such examples where the single peak diagram shows the histidine (His) fragment with a m/z ratio of 110 Da and the double peak diagram the leucine (Leu) and isoleucine (Ile) fragments with a m/z ratio of 86 Da.

![](./Fig4_peak_results.png)
__Figure 2:__ Results plots for a single His peak and a double Leu and Ile peak depicting the peak fit (first row) and the posterior predictive checks (second row) alongside the raw data. The numerical results are listed in table 2.
__Figure 2:__ Results plots for a single His peak and a double Leu and Ile peak depicting the peak fit (first row) and the posterior predictive checks (second row) alongside the raw data. The numerical results are listed in Table 2.

The posterior predictive plots in the second row of Figure 4 are provided for visual posterior predictive checks, namely the comparison of observed and predicted data distribution.
Since a posterior predictive check is based on drawing samples from the likelihood function, the result represents the theoretical range of values encompassed by the model.
Expand Down
Loading