diff --git a/README.md b/README.md index d0642e2..1ce6283 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ # SIP -Codes and data to reproduce the results of the paper: +Codes and data to reproduce the results of: -* _Probabilistic performance estimators for computational chemistry methods: Systematic Improvement Probability and Ranking Probability Matrix_, by P. Pernot and A. Savin. WIP... +* P. Pernot and A. Savin (2020) _Probabilistic performance estimators for computational chemistry methods: Systematic Improvement Probability and Ranking Probability Matrix. I. Theory_, [_J. Chem. Phys._ __152__:164108](http://dx.doi.org/10.1063/5.0006202). + +* P. Pernot and A. Savin (2020) _Probabilistic performance estimators for computational chemistry methods: Systematic Improvement Probability and Ranking Probability Matrix. II. Applications_, [_J. Chem. Phys._ __152__:164109](http://dx.doi.org/10.1063/5.0006204). diff --git a/analysis/0-Setup.R b/analysis/0-Setup.R index 6cfcce4..e0be46e 100644 --- a/analysis/0-Setup.R +++ b/analysis/0-Setup.R @@ -15,7 +15,7 @@ for (lib in libs) { } } ## Load packages and generate biblio -repmis::LoadandCite(libs,file='../article/packages.bib') +repmis::LoadandCite(libs) #,file='../article/packages.bib') ## Github package lib = "ErrViewLib" diff --git a/analysis/Appendix_B.R b/analysis/Appendix_B.R index e9d9599..d2064e3 100644 --- a/analysis/Appendix_B.R +++ b/analysis/Appendix_B.R @@ -2,7 +2,7 @@ source('0-Setup.R') caseName = 'AppB' -# Fig. 30 #### +# Fig. I-5 #### ## g-and-h Samples CC nMC = 1000 diff --git a/analysis/Appendix_C.R b/analysis/Appendix_C.R index 916ca73..1af39e8 100644 --- a/analysis/Appendix_C.R +++ b/analysis/Appendix_C.R @@ -167,7 +167,7 @@ stop() # Analysis #### -## Fig. 31 +## Fig. I-6 #### source('0-Setup.R') diff --git a/analysis/Appendix_D.R b/analysis/Appendix_D.R index f5ad75d..fc1df17 100644 --- a/analysis/Appendix_D.R +++ b/analysis/Appendix_D.R @@ -62,7 +62,7 @@ b1l = boot(X, statistic = ErrViewLib::q95,R = nMC) b2l = boot(X, statistic = ErrViewLib::q95hd,R = nMC) -# Fig. 32 #### +# Fig. I-7 #### png( file = paste0(figRepo,caseName,'_Compare_Q95.png'), width = 1.75*gPars$reso, @@ -332,7 +332,7 @@ for(score in scores) { # save(resp,file = 'scoresBS.Rda') -# Fig. 33 #### +# Fig. I-8 #### ifig=0 png( file = paste0(figRepo,caseName,'_scoresBS.png'), diff --git a/analysis/BOR2019.R b/analysis/BOR2019.R index 75c07ca..87aba0d 100644 --- a/analysis/BOR2019.R +++ b/analysis/BOR2019.R @@ -100,7 +100,7 @@ sink() # Figures #### eps = 0.2 # eV : uncertainty level ("a few tenths of eV") -# Fig. 3 #### +# Fig. I-3 #### ifig =1 png(file=paste0(figRepo,caseName,'_compareECDF.png'), width=gPars$reso,height=gPars$reso) @@ -130,7 +130,7 @@ ErrViewLib::plotDeltaCDF( dev.off() ### -# Fig. 8 #### +# Fig. II-5 #### png( file = paste0(figRepo, caseName,'_Cormat_Errors_Spearman.png'), width = 13/12*gPars$reso, @@ -141,7 +141,7 @@ ErrViewLib::plotCorMat(cErr, order = 'hclust', gPars=gPars) dev.off() ### -# Fig. 9 #### +# Fig. II-6 #### ifig =1 png(file=paste0(figRepo,caseName,'_compareECDF2.png'), width=gPars$reso,height=gPars$reso) @@ -171,7 +171,7 @@ ErrViewLib::plotDeltaCDF( dev.off() #### -# Fig. 10 #### +# Fig. II-7 #### png( file = paste0(figRepo, caseName,'_SIPHeatmap.png'), width = 13/12*gPars$reso, @@ -181,7 +181,7 @@ ErrViewLib::plotSIPMat(statBS$sip, gPars = gPars) dev.off() ### -# Fig. 11 #### +# Fig. II-8 #### ifig = 0 for (score in c('mue','q95hd','msip')) for (type in c('levels','ci')[1]) { diff --git a/analysis/CAL2019.R b/analysis/CAL2019.R index 3edeed2..abf5703 100644 --- a/analysis/CAL2019.R +++ b/analysis/CAL2019.R @@ -60,7 +60,7 @@ sink() # Figures #### -# Fig. 14 #### +# Fig. II-11 #### # Selection of Delta CDF figs dfas = unique(sapply(methList,function(x) strsplit(x,'-D')[[1]][1])) @@ -85,7 +85,7 @@ for(dfa in dfas[c(4,5,10)]) { } ### -# Fig. 15 #### +# Fig. II-12 #### # D4-ATM and D3 sel1 = sort(c(seq(1,ncol(Errors),by=3),seq(3,ncol(Errors),by=3))) ifig = 0 diff --git a/analysis/DAS2019.R b/analysis/DAS2019.R index 76d74d9..f3e283b 100644 --- a/analysis/DAS2019.R +++ b/analysis/DAS2019.R @@ -76,20 +76,9 @@ sink() # Figures #### -# Fig. 19 #### -png(filename = paste0(figRepo,caseName,'_ParPlot.png'), - width=reso,height=reso) -ErrViewLib::plotParallel( - Errors, - rescale = TRUE, - labels = systems, - outliers = 'no', - lab.thresh = 1, - gPars=gPars) -dev.off() -### -# Fig. 20 #### + +# Fig. II-16 #### ifig=1 png( file = paste0(figRepo, caseName,'_CorrMat_Errors_Spearman.png'), @@ -173,7 +162,21 @@ ErrViewLib::plotCorMat( dev.off() ### -# Fig. 21 #### +# Fig. II-17 #### +png(filename = paste0(figRepo,caseName,'_ParPlot.png'), + width=reso,height=reso) +ErrViewLib::plotParallel( + Errors, + rescale = TRUE, + labels = systems, + outliers = 'no', + outLabCex = 3, + lab.thresh = 1, + gPars=gPars) +dev.off() +### + +# Fig. II-18 #### ifig=1 png( file = paste0(figRepo, caseName,'_SIPHeatmap_Pruned.png'), @@ -209,7 +212,7 @@ ErrViewLib::plotDeltaCDF( dev.off() ### -# Fig. 22 #### +# Fig. II-19 #### ifig = 0 for (score in c('mue','q95hd','msip')) for (type in c('levels','ci')[1]) { diff --git a/analysis/JEN2018.R b/analysis/JEN2018.R index 0ad7f77..21bb0ac 100644 --- a/analysis/JEN2018.R +++ b/analysis/JEN2018.R @@ -50,7 +50,7 @@ sink() # Figures #### -# Fig. 16 #### +# Fig. II-13 #### ifig=1 png( file = paste0(figRepo, caseName,'_CorrMat_Errors_Spearman.png'), @@ -94,7 +94,7 @@ ErrViewLib::plotCorMat( dev.off() ### -# Fig. 17 #### +# Fig. II-14 #### ifig=1 png(file=paste0(figRepo,caseName,'_compareECDF.png'), width=gPars$reso,height=gPars$reso) @@ -146,7 +146,7 @@ ErrViewLib::plotDeltaCDF( dev.off() ### -# Fig. 18 #### +# Fig. II-15 #### ifig = 0 for (score in c('mue','q95hd','msip')) for (type in c('levels','ci')[1]) { diff --git a/analysis/NAR2019.R b/analysis/NAR2019.R index cca27bc..f75bd2f 100644 --- a/analysis/NAR2019.R +++ b/analysis/NAR2019.R @@ -54,7 +54,7 @@ sink() # Figures #### -# Fig. 12 #### +# Fig. II-9 #### cex.lab= 1.25 ifig=1 png( @@ -102,7 +102,7 @@ ErrViewLib::plotCorMat( dev.off() ### -# Fig. 13 #### +# Fig. II-10 #### ifig=1 png(file=paste0(figRepo,caseName,'_compareECDF.png'), width=gPars$reso,height=gPars$reso) diff --git a/analysis/PER2018.R b/analysis/PER2018.R index b61925a..943dfb8 100644 --- a/analysis/PER2018.R +++ b/analysis/PER2018.R @@ -33,7 +33,6 @@ gParsExt$cols = colsExt gParsExt$cols_tr = colsExt_tr gParsExt$cols_tr2 = colsExt_tr2 - # Generate stats #### statBS = ErrViewLib::estBS1(Errors,props = c("mue", "q95hd")) @@ -55,7 +54,7 @@ sink() # Figures #### -# Fig 2 #### +# Fig. II-2 #### png( file = paste0(figRepo, caseName,'_SIPHeatmap.png'), width = 13/12*gPars$reso, @@ -65,54 +64,8 @@ ErrViewLib::plotSIPMat(statBS$sip, gPars = gPars) dev.off() ### -# Figs 4 and 7 #### -ifig=0 -gParLoc = gPars -for (score in c('mue')) - for (type in c('levels','ci')) { - png( - file = paste0(figRepo, caseName,'_figRanks_',score,'_',type,'.png'), - width = 1.5*gPars$reso, - height = 1.5*gPars$reso - ) - ifig = ifig + 1 - cex.lab = 1 - if(type == 'ci'){ - cex.lab = 1.1 - gParLoc$cex = 1.2*gPars$cex - } - ErrViewLib::plotRankMat( - E = Errors, - score = score, - type = type, - label = ifig, - cex.lab = cex.lab, - gPars = gParLoc - ) - dev.off() - } - -ifig=0 -for (score in c('q95hd','msip')) - for (type in c('levels')) { - png( - file = paste0(figRepo, caseName,'_figRanks_',score,'_',type,'.png'), - width = 1.5*gPars$reso, - height = 1.5*gPars$reso - ) - ifig = ifig + 1 - ErrViewLib::plotRankMat( - E = Errors, - score = score, - type = type, - label = ifig, - gPars = gPars - ) - dev.off() - } -### -# Figs 4 SPLIT #### +# Fig. II-4 #### ifig=0 gParLoc = gPars for (score in c('mue','q95hd','msip')) @@ -140,7 +93,7 @@ for (score in c('mue','q95hd','msip')) } ### -# Fig. 5 #### +# Fig. II-1 #### cex.lab = 1.25 png( file = paste0(figRepo, caseName,'_CorrMat_Errors_Spearman.png'), @@ -208,7 +161,7 @@ h = hist(X[lower.tri(X)],breaks = seq(-1.1,1.1,by=0.2), dev.off() ### -# Fig. 6 #### +# Fig. II-3 #### ifig =1 png(file=paste0(figRepo,caseName,'_compareECDF.png'), width=gPars$reso,height=gPars$reso) diff --git a/analysis/THA2015.R b/analysis/THA2015.R index e1dd4f0..5d71637 100644 --- a/analysis/THA2015.R +++ b/analysis/THA2015.R @@ -93,7 +93,7 @@ sink() # Figures #### cex.lab = 1.0 -# Fig. 23 #### +# Fig. II-20 #### ifig = 1 png(file = paste0(figRepo, caseName,'_CorrMat_Errors.png'), width = 1.5*gPars$reso, @@ -175,7 +175,7 @@ ErrViewLib::plotCorMat( dev.off() ### -# Fig.25 #### +# Fig. II-22 #### sel = c(1,2,4,7) ifig = 1 png(file=paste0(figRepo,caseName,'_compareECDF_Pruned.png'), @@ -207,7 +207,7 @@ ErrViewLib::plotUncEcdf( dev.off() ### -# Fig. 26(a) #### +# Fig. II-23(a) #### ifig = 1 png(file = paste0(figRepo, caseName,'_SIPHeatmap_Pruned.png'), width = 13/12*gPars$reso, @@ -217,7 +217,7 @@ dev.off() ### -# Fig. 27 top-middle #### +# Fig. II-24-(a-f) #### ifig = 0 for (score in c('mue','q95hd','msip')) for (type in c('levels','ci')[1]) { diff --git a/analysis/WU2015.R b/analysis/WU2015.R index a6e3db7..154da16 100644 --- a/analysis/WU2015.R +++ b/analysis/WU2015.R @@ -60,7 +60,7 @@ sink() # Figures #### -# Fig. 1 #### +# Fig. I-1 #### cex.lab = 1.0 ifig = 1 png(file = paste0(figRepo, caseName,'_CorrMat_Data_Spearman.png'), @@ -87,7 +87,7 @@ ErrViewLib::plotCorMat( gPars = gPars) dev.off() -# Fig. 24 #### +# Fig. II-21 #### cex.lab=1.0 ifig = 1 png(file = paste0(figRepo, caseName,'_CorrMat_Errors_Spearman.png'), @@ -131,7 +131,7 @@ ErrViewLib::plotCorMat( dev.off() ### -# Fig. 26(b) #### +# Fig. II-23(b) #### png( file = paste0(figRepo, caseName,'_SIPHeatmap.png'), width = 13/12*gPars$reso, @@ -143,7 +143,7 @@ ErrViewLib::plotSIPMat(statBS$sip, dev.off() ### -# Fig. 27 bottom #### +# Fig. II-24(g-i) #### cex.lab=1 ifig = 6 for (score in c('mue','q95hd','msip')) diff --git a/analysis/ZAS2019.R b/analysis/ZAS2019.R index 791f928..26a38f6 100644 --- a/analysis/ZAS2019.R +++ b/analysis/ZAS2019.R @@ -60,7 +60,7 @@ sink() # Figures #### -# Fig. 28 #### +# Fig. II-25 #### cex.lab = 1.0 ifig=1 png( @@ -108,7 +108,7 @@ ErrViewLib::plotCorMat( dev.off() ### -# Fig. 29 #### +# Fig. II-26 #### ifig=1 png(file=paste0(figRepo,caseName,'_compareECDF.png'), width= gPars$reso,height= gPars$reso) diff --git a/analysis/sessionInfo.txt b/analysis/sessionInfo.txt index b9175da..20774dc 100644 --- a/analysis/sessionInfo.txt +++ b/analysis/sessionInfo.txt @@ -6,34 +6,58 @@ Matrix products: default BLAS: /usr/lib/openblas-base/libblas.so.3 LAPACK: /usr/lib/libopenblasp-r0.2.19.so +Random number generation: + RNG: Mersenne-Twister + Normal: Inversion + Sample: Rounding + attached base packages: -[1] grid stats graphics grDevices utils datasets -[7] methods base +[1] grid stats graphics grDevices +[5] utils datasets methods base other attached packages: - [1] ErrViewLib_1.1 distillery_1.0-6 WRS_0.36 - [4] WRS2_1.0-0 dplyr_0.8.4 ggplot2_3.2.1 - [7] sessioninfo_1.1.1 repmis_0.5 corrplot_0.84 -[10] boot_1.3-22 rlist_0.4.6.1 inlmisc_0.4.9 + [1] ErrViewLib_1.1 distillery_1.0-6 + [3] WRS_0.36 WRS2_1.0-0 + [5] dplyr_0.8.4 ggplot2_3.3.0 + [7] sessioninfo_1.1.1 repmis_0.5 + [9] corrplot_0.84 boot_1.3-22 +[11] rlist_0.4.6.1 inlmisc_0.4.9 [13] mixtools_1.2.0 xtable_1.8-4 loaded via a namespace (and not attached): - [1] tidyselect_1.0.0 xfun_0.12 purrr_0.3.3 - [4] kernlab_0.9-29 splines_3.6.1 lattice_0.20-38 - [7] mc2d_0.1-18 colorspace_1.4-1 htmltools_0.4.0 -[10] yaml_2.2.1 survival_3.1-8 rlang_0.4.4 -[13] R.oo_1.23.0 pillar_1.4.3 glue_1.3.1 -[16] withr_2.1.2 R.utils_2.9.2 sp_1.3-2 -[19] segmented_1.1-0 R.cache_0.14.0 lifecycle_0.1.0 -[22] plyr_1.8.5 munsell_0.5.0 gtable_0.3.0 -[25] raster_3.0-12 R.methodsS3_1.8.0 mvtnorm_1.0-12 -[28] codetools_0.2-16 evaluate_0.14 knitr_1.28 -[31] fansi_0.4.1 Rcpp_1.0.3 backports_1.1.5 -[34] scales_1.1.0 checkmate_2.0.0 farver_2.0.3 -[37] packrat_0.5.0 digest_0.6.24 rgdal_1.4-8 -[40] cli_2.0.1 tools_3.6.1 magrittr_1.5 -[43] lazyeval_0.2.2 tibble_2.1.3 crayon_1.3.4 -[46] pkgconfig_2.0.3 MASS_7.3-51.5 Matrix_1.2-17 -[49] data.table_1.12.8 reshape_0.8.8 assertthat_0.2.1 -[52] rmarkdown_2.1 httr_1.4.1 rstudioapi_0.11 -[55] R6_2.4.1 igraph_1.2.4.2 compiler_3.6.1 + [1] httr_1.4.1 pkgload_1.0.2 + [3] splines_3.6.1 R.utils_2.9.2 + [5] assertthat_0.2.1 sp_1.4-1 + [7] remotes_2.1.1 yaml_2.2.1 + [9] pillar_1.4.3 backports_1.1.5 +[11] lattice_0.20-38 glue_1.3.2 +[13] digest_0.6.25 checkmate_2.0.0 +[15] colorspace_1.4-1 htmltools_0.4.0 +[17] Matrix_1.2-17 R.oo_1.23.0 +[19] plyr_1.8.5 pkgconfig_2.0.3 +[21] devtools_2.2.2 raster_3.0-12 +[23] purrr_0.3.3 mvtnorm_1.1-0 +[25] scales_1.1.0 processx_3.4.2 +[27] tibble_3.0.0 farver_2.0.3 +[29] usethis_1.5.1 ellipsis_0.3.0 +[31] withr_2.1.2 cli_2.0.2 +[33] survival_3.1-8 magrittr_1.5 +[35] crayon_1.3.4 memoise_1.1.0 +[37] evaluate_0.14 ps_1.3.2 +[39] R.methodsS3_1.8.0 fs_1.3.1 +[41] fansi_0.4.1 R.cache_0.14.0 +[43] MASS_7.3-51.5 segmented_1.1-0 +[45] pkgbuild_1.0.6 tools_3.6.1 +[47] data.table_1.12.8 prettyunits_1.1.1 +[49] lifecycle_0.2.0 kernlab_0.9-29 +[51] munsell_0.5.0 callr_3.4.3 +[53] compiler_3.6.1 rlang_0.4.5 +[55] rstudioapi_0.11 igraph_1.2.5 +[57] rmarkdown_2.1 testthat_2.3.2 +[59] gtable_0.3.0 codetools_0.2-16 +[61] reshape_0.8.8 R6_2.4.1 +[63] knitr_1.28 rgdal_1.4-8 +[65] mc2d_0.1-18 rprojroot_1.3-2 +[67] desc_1.2.0 Rcpp_1.0.4 +[69] vctrs_0.2.4 tidyselect_1.0.0 +[71] xfun_0.12 diff --git a/article/Old/article.lyx b/article/Old/article.lyx deleted file mode 100644 index 11028d3..0000000 --- a/article/Old/article.lyx +++ /dev/null @@ -1,26457 +0,0 @@ -#LyX 2.3 created this file. For more info see http://www.lyx.org/ -\lyxformat 544 -\begin_document -\begin_header -\save_transient_properties true -\origin unavailable -\textclass article -\begin_preamble -\usepackage{datetime} -\usepackage{refstyle} -\usepackage{url} -\usepackage[title,toc,page,header]{appendix} -\usepackage[nosort,super]{cite} - -\usepackage{todonotes} -\newcommand{\PP}[1]{\todo[inline,color=green!20!white]{\textbf{Pascal:} #1}} - -\pdfminorversion=5 -\pdfcompresslevel=9 -\pdfobjcompresslevel=2 -\end_preamble -\use_default_options false -\maintain_unincluded_children false -\language english -\language_package none -\inputencoding utf8 -\fontencoding T1 -\font_roman "lmodern" "default" -\font_sans "default" "default" -\font_typewriter "default" "default" -\font_math "auto" "auto" -\font_default_family default -\use_non_tex_fonts false -\font_sc false -\font_osf false -\font_sf_scale 100 100 -\font_tt_scale 100 100 -\use_microtype true -\use_dash_ligatures true -\graphics default -\default_output_format default -\output_sync 0 -\bibtex_command default -\index_command default -\paperfontsize 12 -\spacing onehalf -\use_hyperref true -\pdf_title "Comparison of methods" -\pdf_bookmarks true -\pdf_bookmarksnumbered false -\pdf_bookmarksopen false -\pdf_bookmarksopenlevel 1 -\pdf_breaklinks true -\pdf_pdfborder true -\pdf_colorlinks true -\pdf_backref false -\pdf_pdfusetitle false -\pdf_quoted_options "citecolor =blue, linkcolor = blue, urlcolor = blue" -\papersize default -\use_geometry true -\use_package amsmath 2 -\use_package amssymb 2 -\use_package cancel 0 -\use_package esint 1 -\use_package mathdots 0 -\use_package mathtools 0 -\use_package mhchem 2 -\use_package stackrel 0 -\use_package stmaryrd 0 -\use_package undertilde 0 -\cite_engine basic -\cite_engine_type default -\biblio_style plain -\use_bibtopic false -\use_indices false -\paperorientation portrait -\suppress_date false -\justification true -\use_refstyle 0 -\use_minted 0 -\index Index -\shortcut idx -\color #008000 -\end_index -\leftmargin 2cm -\topmargin 2cm -\rightmargin 2cm -\bottommargin 2cm -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation indent -\paragraph_indentation default -\is_math_indent 0 -\math_numbering_side default -\quotes_style english -\dynamic_quotes 0 -\papercolumns 1 -\papersides 1 -\paperpagestyle default -\tracking_changes false -\output_changes false -\html_math_output 0 -\html_css_as_file 0 -\html_be_strict false -\end_header - -\begin_body - -\begin_layout Title - -\series bold -\color blue -Probabilistic performance estimators for computational chemistry methods: -\begin_inset Newline newline -\end_inset - - Systematic Improvement Probability and -\begin_inset Newline newline -\end_inset - - Ranking Probability Matrix -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\series bold -\color blue -On the comparison of error sets and -\begin_inset Newline newline -\end_inset - -their statistics in benchmarking studies -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Author -Pascal PERNOT -\begin_inset Newline newline -\end_inset - -Institut de Chimie Physique, UMR8000, -\begin_inset Newline newline -\end_inset - -CNRS, Université Paris-Saclay, -\begin_inset Newline newline -\end_inset - -91405 Orsay, France -\begin_inset Newline newline -\end_inset - -email: Pascal.Pernot@universite-paris-saclay.fr -\begin_inset Newline newline -\end_inset - - -\begin_inset Newline newline -\end_inset - -Andreas SAVIN -\begin_inset Newline newline -\end_inset - -Laboratoire de Chimie Théorique, -\begin_inset Newline newline -\end_inset - -CNRS and UPMC Université Paris 06, -\begin_inset Newline newline -\end_inset - -Sorbonne Universités, 75252 Paris, France -\begin_inset Newline newline -\end_inset - -email: Andreas.Savin@lct.jussieu.fr -\begin_inset Newline newline -\end_inset - - -\begin_inset space ~ -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Abstract -The comparison of benchmark error sets is an essential tool for the evaluation - of theories in computational chemistry. - The standard ranking of methods by their Mean Absolute Error is unsatisfactory - for several reasons linked to the non-normality of the error distributions - and the presence of underlying trends. - Complementary statistics have recently been proposed to palliate such deficienc -ies, such as quantiles of the absolute errors distribution or the mean predictio -n uncertainty. - We introduce here a new score, the systematic improvement probability (SIP), - based on the direct pair-wise comparison of absolute errors, bypassing - the need of other descriptive statistics. - Independently of the chosen scoring rule, the uncertainty of the statistics - due to the incompleteness of the benchmark data sets is also generally - overlooked. - However, this uncertainty is essential to appreciate the robustness of - score-based rankings. - In the present article, we develop two indicators based on robust statistics - to address this problem: -\begin_inset Formula $P_{inv}$ -\end_inset - -, the inversion probability between two values of a statistic, and -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - -, the ranking probability matrix. - We demonstrate also the essential contribution of the correlations between - error sets in these scores comparisons. - These methods are applied to eight data sets extracted from the recent - benchmarking literature. -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\begin_inset CommandInset toc -LatexCommand tableofcontents - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage newpage -\end_inset - - -\end_layout - -\begin_layout Section -Introduction -\end_layout - -\begin_layout Standard -Benchmarks are a central tool for the evaluation of new theories/methods - in quantum chemistry -\begin_inset CommandInset citation -LatexCommand cite -key "Mata2017" -literal "false" - -\end_inset - -. - Amongst many possible metrics -\begin_inset CommandInset citation -LatexCommand citep -key "Civalleri2012" -literal "false" - -\end_inset - -, the most common benchmarking statistics are the mean unsigned error (MUE/MAD/M -AE...), mean signed error (MSE), root mean squared error (RMSE) and root mean - squared deviation (RMSD). - The explicit definition of these scores is given in Ref. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - In a vast majority of benchmark studies, the MUE, or some variant of it, - is used to compare methods performance. -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -If the ranking of methods is precious for developers who want to assess - the impact of their latest methods, it might be of less interest for final - users. - In particular, it does not generally offer the choice and criteria for - picking another method than the 'best one'. -\end_layout - -\end_inset - - Recently -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -, we proposed a more informative probabilistic score, the 95th percentile - of the absolute errors distribution ( -\begin_inset Formula $Q_{95}$ -\end_inset - -). -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -We argued that -\begin_inset Formula $Q_{95}$ -\end_inset - - is more informative than the MUE, because the latter provides probabilistic - information only if the errors distribution is zero-centered normal, a - rather unlikely occurrence. - In contrast, -\begin_inset Formula $Q_{95}$ -\end_inset - - gives us the error level that one has only 5 -\begin_inset space \thinspace{} -\end_inset - -% chance to exceed in a new calculation (provided that the reference dataset - is representative of the systems for which predictions are sought). - The end-users can easily check if this threshold meets their expectations. - We recently realized that the 90th percentile (noted -\begin_inset Formula $P_{90}$ -\end_inset - -) has been used by Thakkar and colleagues in the same spirit -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015,Wu2015b" -literal "false" - -\end_inset - -. - We think -\begin_inset Formula $Q_{95}$ -\end_inset - - is more appropriate because of its direct link to the enlarged uncertainty - -\begin_inset Formula $u_{95}$ -\end_inset - - recommended in the thermochemistry literature -\begin_inset CommandInset citation -LatexCommand cite -key "Ruscic2014,Pernot2018" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Whichever the statistic used, the question remains of the robustness of - such scores and rankings with respect to the choice of the reference dataset. - One easily conceives that the values of these statistics change unpredictably - when one adds or removes points in the dataset. - Benchmarks implicitly assume that the error sets are representative samples - of unknown distributions characterizing model errors for each method – - the more systems in the dataset, the best the approximation of the underlying - distributions. - The quest for large datasets incurs heavy computer charges to perform benchmark -s, and there is also a trend to reduce this burden by looking for small, - optimally representative, datasets -\begin_inset CommandInset citation -LatexCommand cite -key "Gould2018,Morgante2019" -literal "false" - -\end_inset - -. - Besides, there are several properties for which the reference data are - rather sparse, leading to rather small datasets. - Another trend, enhanced by the development of machine learning is to replace - experimental values by gold standard calculations, with limitations on - the size of accessible systems -\begin_inset CommandInset citation -LatexCommand cite -key "Ramakrishnan2015,Zaspel2019" -literal "false" - -\end_inset - -. - As the estimated values of the statistics and their uncertainties depend - on the size of the dataset, it is important to assess this size effect - and its impact on statistics comparison and ranking. - -\color red - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\series bold -\color red -Comment on (non-)randomness of error sets (Andreas, any idea ?). -\end_layout - -\begin_layout Itemize - -\color red -reference data not random, but typically selected as representative of a - larger population -\end_layout - -\begin_layout Itemize - -\color red -error sets can be biased, should ideally be corrected, then unpredictability - remains -\end_layout - -\begin_layout Itemize - -\color red -in the limit of an exact method, error sets dominated by random exptl errors -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -This question has been considered recently by Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - -, who used bootstrapping to assess the impact of dataset size and reference - data uncertainty on the first place in an intercomparison of M -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash - -\begin_inset Quotes erd -\end_inset - -o -\end_layout - -\end_inset - -ssbauer isomer shifts estimated by a dozen of DFAs. - They concluded that for their dataset of -\begin_inset Formula $N=39$ -\end_inset - - values, at least three methods were competing for the first place, with - a slight probabilistic advantage for PBE0. - This is a very interesting contribution to the quality assessment of benchmarki -ng tools. - We recently considered another approach to this problem by defining an - inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - for the ranking of two methods -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - Our definition, which was based on the assumption of a normal distribution - of statistics differences and neglected error sets correlations, deserves - a more general setup. - -\end_layout - -\begin_layout Standard -In the present study, we revisit the ranking uncertainty problem along several - complementary lines: -\end_layout - -\begin_layout Enumerate -we consider the statistical significance of the difference between two values - of a statistic: it depends both on the uncertainty on the estimated values, - which is notably influenced by the dataset size, and on the correlation - between these values, which -\color red -is due in a large part to the use of a common reference dataset -\color inherit - -\begin_inset CommandInset citation -LatexCommand cite -key "Nicholls2016" -literal "false" - -\end_inset - -. - A few specific points have also to be considered: the non-normality of - the error sets distributions, the small size of some datasets, the uncertainty - on reference data, and some properties of quantiles estimators. - -\end_layout - -\begin_layout Enumerate -we define a ranking probability matrix -\begin_inset Formula $\mathrm{P}_{r}$ -\end_inset - -, generalizing the proposition of Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - -, which enables us to propose efficient visual assessments of the robustness - of rankings. -\end_layout - -\begin_layout Enumerate -we introduce a new statistic (the systematic improvement probability, SIP) - that conveys the proportion of systems in the benchmark data set for which - one method has smaller absolute errors than the other, and the expected - gain or loss when switching between methods. -\end_layout - -\begin_layout Standard -\begin_inset VSpace defskip -\end_inset - - -\end_layout - -\begin_layout Standard -In the next section, we consider the uncertainty sources impacting the values - of benchmarking statistics (scores) and we present the tools best adapted - to estimate the uncertainty on statistics and to compare them. - These methods are then validated on several datasets taken from the recent - benchmarking literature and covering a wide range of dataset sizes. - The discussion considers the impact of these observations on the benchmarking - practice and proposes several suggestions on their reporting, as well as - on the best practice to share benchmark data. -\end_layout - -\begin_layout Section -Statistical methods -\end_layout - -\begin_layout Subsection -Error sets, their uncertainty and correlation -\begin_inset CommandInset label -LatexCommand label -name "subsec:Error-sets,-their" - -\end_inset - - -\end_layout - -\begin_layout Standard -Benchmarking of a method -\begin_inset Formula $M$ -\end_inset - - is based on the statistical analysis of its error set ( -\begin_inset Formula $E_{M}=\left\{ e_{i}(M)\right\} _{i=1}^{N}$ -\end_inset - -), based on a set of -\begin_inset Formula $N$ -\end_inset - - calculated ( -\begin_inset Formula $C_{M}=\left\{ c_{i}(M)\right\} _{i=1}^{N}$ -\end_inset - -) and reference data ( -\begin_inset Formula $R=\left\{ r_{i}\right\} _{i=1}^{N}$ -\end_inset - -), where -\begin_inset Formula -\begin{equation} -e_{i}(M)=r_{i}-c_{i}(M)\label{eq:errors-def} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Uncertainty. -\end_layout - -\begin_layout Standard -As the reference data or even the calculated values can be uncertain, one - should consider that the error sets contain uncertain values when estimating - and comparing statistics. - Experimental or computational uncertainties being typically estimated by - standard deviations, one can use the method of combination of variances - to get the uncertainty on the errors -\begin_inset CommandInset citation -LatexCommand cite -key "GUM" -literal "false" - -\end_inset - -, -\begin_inset Formula -\begin{equation} -u(e_{i})=\sqrt{u(r_{i})^{2}+u(c_{i})^{2}}\label{eq:ue-def} -\end{equation} - -\end_inset - -where -\begin_inset Formula $u(x)$ -\end_inset - - is the uncertainty on -\begin_inset Formula $x$ -\end_inset - -. - This formula assumes that the individual errors on the reference data and - calculated values are uncorrelated. - For an experimental reference value -\begin_inset Formula $r_{i}$ -\end_inset - -, -\begin_inset Formula $u(r_{i})$ -\end_inset - - would typically be a measurement uncertainty. - For a computed reference value -\begin_inset Formula $r_{i}$ -\end_inset - - and for a calculated value -\begin_inset Formula $c_{i}$ -\end_inset - -, uncertainty might come from numerical uncertainty due to the use of finite - precision arithmetics and discretization errors -\begin_inset CommandInset citation -LatexCommand cite -key "Janes2011,Cances2017" -literal "false" - -\end_inset - -, statistical uncertainty ( -\emph on -e.g. -\emph default -, for Monte Carlo methods -\begin_inset CommandInset citation -LatexCommand cite -key "Reynolds_1982,Cailliez2011" -literal "false" - -\end_inset - -), parametric uncertainty ( -\emph on -e.g. -\emph default -, for calibrated parametric methods -\begin_inset CommandInset citation -LatexCommand cite -key "Mortensen2005,Cailliez2011,Pernot2017b" -literal "false" - -\end_inset - -), or calibration of computational protocols -\begin_inset CommandInset citation -LatexCommand cite -key "Bakowies2019,Bakowies2020" -literal "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -We consider here deterministic computational chemistry methods with assumed - low and controlled arithmetic uncertainty. - The uncertainty on errors is then equal to the reference data uncertainty - -\begin_inset Formula $u(e_{i})\equiv u(r_{i})$ -\end_inset - -. - For the sake of generality, the -\begin_inset Formula $u(e_{i})$ -\end_inset - - notation is preserved in the following. -\end_layout - -\begin_layout Paragraph -Correlation. -\end_layout - -\begin_layout Standard -Let us consider a set of -\begin_inset Formula $K$ -\end_inset - - methods -\begin_inset Formula $\left\{ M_{i}\right\} _{i=1}^{K}$ -\end_inset - -. - The covariance of the error sets for two method can be decomposed as -\begin_inset Formula -\begin{align} -\mathrm{cov}(E_{i},E_{j}) & =\mathrm{cov}(R-C_{i},R-C_{j})\\ - & =\mathrm{var}(R)+\mathrm{cov}(C_{i},C_{j})-\mathrm{cov}(R,C_{i})-\mathrm{cov}(R,C_{j}) -\end{align} - -\end_inset - -where, for brevity, we use shortened notations such as -\begin_inset Formula $E_{i}\equiv E_{M_{i}}$ -\end_inset - -. - It is not possible to predict the sign and amplitude of -\begin_inset Formula $\mathrm{cov}(E_{i},E_{j})$ -\end_inset - - from this decomposition, but a few considerations might be helpful: -\end_layout - -\begin_layout Itemize - -\color teal -when comparing computational chemistry methods, it is very likely that their - prediction sets are strongly positively correlated (covariant). - It is also very likely that the predictions of good methods have a strong - positive covariance with the reference data, if the latter are not dominated - by measurement errors. - Besides, one can expect that the variance of the reference data is of the - same order (possibly larger if there are notable experimental errors) as - the variance/covariances of the predictions. - So, in a typical comparison scenario, -\begin_inset Formula $\mathrm{cov}(E_{i},E_{j})$ -\end_inset - - results from the compensation of terms with similar magnitudes, and one - should not expect a null covariance of error sets. -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\color red -even if the predictions of two methods are statistically independent ( -\begin_inset Formula $\mathrm{cov}(C_{1},C_{2})=0$ -\end_inset - -), it is likely (and desirable) that they have positive correlations with - the reference data. - -\begin_inset Formula -\begin{align*} -cov(R,C_{i}) & \le\sqrt{var(R)var(C_{i})}\\ -cov(R,C_{j}) & \le\sqrt{var(R)var(C_{j})}\\ -cov(E_{i},E_{j}) & \ge var(R)-\sqrt{var(R)}(\sqrt{(var(C_{i})}+\sqrt{(var(C_{i})}) -\end{align*} - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Itemize -if reference data uncertainties are larger than prediction errors, the covarianc -e should be dominated by -\begin_inset Formula $\mathrm{var}(R)$ -\end_inset - -, and all error sets should be strongly positively correlated. -\end_layout - -\begin_layout Standard -In the following case studies (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Examples" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), we report and analyze the correlation coefficients between error sets - (normalized covariances) -\begin_inset Formula -\begin{align} -\mathrm{corr}(E_{i},E_{j}) & =\frac{\mathrm{cov}(E_{i},E_{j})}{\sigma_{E_{i}}\sigma_{E_{j}}} -\end{align} - -\end_inset - -where -\begin_inset Formula $\sigma_{E_{i}}$ -\end_inset - -is the standard deviation of the error set -\begin_inset Formula $E_{i}$ -\end_inset - -, assumed finite. - We will show through case studies that the correlation matrix contains - relevant information on the quality of datasets and the proximity of methods. -\end_layout - -\begin_layout Paragraph -Representation. -\end_layout - -\begin_layout Standard - -\color teal -Correlation matrices can be represented by combining a color scheme and - an ellipse model -\begin_inset CommandInset citation -LatexCommand cite -key "Murdoch1996" -literal "false" - -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:cmat-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), such that a white circle stands for a null correlation, a blue right-slanted - ellipse for a positive correlation and a red left-slanted ellipse for a - negative one. - The larger the absolute value of the correlation, the darker the color - and the thinner the ellipse. - -\color inherit - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Wu2015_CorrMat_Data_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Wu2015_CorrMat_Errors_Spearman0.png - lyxscale 20 - width 32text% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:cmat-example" - -\end_inset - -Rank correlation matrices between (a) data sets and (b) errors sets for - case WU2015. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard - -\color teal -For the example showcased in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:cmat-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, one sees that the datasets -\begin_inset Formula $C_{i}$ -\end_inset - - are all strongly positively correlated. - By contrast, the error sets -\begin_inset Formula $E_{i}$ -\end_inset - - present a more relaxed pattern, with weaker positive correlations, and - even a very small negative correlation for MP2 with all the other error - sets. - Having noticed this, one remarks that MP2 data present also smaller correlation - coefficients with other datasets, although this is barely visible on the - figure (the difference bears on the third digit of the correlation coefficients -). - In the following, one will present only correlation matrices for error - sets. -\end_layout - -\begin_layout Subsection -Statistics, their uncertainty and correlation -\begin_inset CommandInset label -LatexCommand label -name "subsec:Statistics,-their-uncertainty" - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Uncertainty. -\end_layout - -\begin_layout Standard -The value -\begin_inset Formula $s$ -\end_inset - - of a statistic -\begin_inset Formula $S$ -\end_inset - - (MSE, MUE, -\begin_inset Formula $Q_{95}$ -\end_inset - -...) estimated on an error set is generally uncertain, with uncertainty estimated - by its standard error -\begin_inset Formula $u(s)$ -\end_inset - -. - Two main uncertainty sources should be considered: (1) the limited size - -\begin_inset Formula $N$ -\end_inset - - of the reference data sample, and (2) the errors uncertainties, -\begin_inset Formula $u(e_{i})$ -\end_inset - - (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Error-sets,-their" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - Unless the dataset is exhaustive ( -\emph on -e.g. -\emph default -, a dataset containing a property for a complete class of systems), the - first source is always present. - For experimental reference data, the second source is also always present, - but experimental uncertainties are rarely available for large datasets, - and a common practice seems to be to ignore them in the statistical analysis - (although they are often discussed to assess the quality of the dataset). - Some studies considered the effect of representative uncertainty levels - on benchmarking conclusions -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,DeWaele2016,Proppe2017" -literal "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -In Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Estimation-of-the" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, the impact of both uncertainty sources is illustrated on the mean value - (MSE), for which analytical formulae are available. - The strategy to handle reference data uncertainty depends on their distribution. - If the reference data uncertainties are uniform over the dataset, the hypothesi -s of -\emph on -i.i.d. - -\emph default - errors holds, and standard statistical procedures can be applied (unless - one is interested in quantifying specifically model errors -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -). - Otherwise, weighted statistics have to be used -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -, which will not be considered here. - Instead, we assume that datasets should not include data with extreme uncertain -ty values. -\end_layout - -\begin_layout Standard -Simple formulae for standard errors, such as those for the mean (a linear - statistic), are not available for non-linear statistics such as the MUE - or -\begin_inset Formula $Q_{95}$ -\end_inset - -. - Moreover, in order to avoid some of the limitations implied by such formulae - ( -\emph on -e.g. -\emph default -, normality hypothesis), one can use a general method to estimate the standard - error of any statistic: the bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Efron1979,Efron1991,Hesterberg2015" -literal "false" - -\end_inset - -. - It is a Monte Carlo sampling method which consists in random draws with - replacement of -\begin_inset Formula $N'$ -\end_inset - - values from a dataset of size -\begin_inset Formula $N$ -\end_inset - -. - In the standard bootstrap, one uses -\begin_inset Formula $N'=N$ -\end_inset - -, -\emph on -i.e. -\emph default -, the generated samples have the same size than the original set. - The bootstrap has been shown to provide reliable estimations of uncertainties, - but the mean values unavoidably reflect the bias due to the original data - set -\begin_inset CommandInset citation -LatexCommand cite -key "Hesterberg2015" -literal "false" - -\end_inset - -. - In consequence, we estimate in the following the mean values from the original - sample and the uncertainties from the bootstrap samples. - The main limitation of the bootstrap is its hypothesis of -\begin_inset Formula $i.i.d.$ -\end_inset - - data, but it is consistent with our choice to avoid weighted statistics - and to avoid reference datasets with a large uncertainty range. - -\end_layout - -\begin_layout Paragraph -Correlation. -\end_layout - -\begin_layout Standard -The statistics covariance -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - - derives from the mathematical expression of -\begin_inset Formula $S$ -\end_inset - - and from the variances and covariance of the error sets, -\begin_inset Formula $\mathrm{cov}(E_{1},E_{2})$ -\end_inset - -. - -\color red - -\color inherit -To estimate -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - - in the case of a linear statistic, one can directly apply the generalization - of the combination of variances to several model outputs -\begin_inset CommandInset citation -LatexCommand cite -key "GUM-Supp2" -literal "false" - -\end_inset - -. - For the MSE, it is easy to demonstrate that the covariance is transferred - in totality: -\begin_inset Formula $\mathrm{cov}(\overline{e}_{1},\overline{e}_{2})=\mathrm{cov}(E_{1},E_{2})$ -\end_inset - -. - More generally, for linear statistics, -\begin_inset Formula $\mathrm{cov}(E_{1},E_{2})=0\Longrightarrow\mathrm{cov}(s_{1},s_{2})=0$ -\end_inset - -. - For non-linear statistics, as the MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - -, the combination of covariances is unsuitable, and uses Monte Carlo strategies. - -\end_layout - -\begin_layout Standard -To illustrate the transfer of correlation from error sets to non-linear - statistics, we performed a Monte Carlo study, detailed in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, with scenarii implying diverse distribution shapes. - A few global trends can be derived from this study, notably that for the - MUE and -\begin_inset Formula $Q_{95},$ -\end_inset - - -\color red - -\color inherit - -\begin_inset Formula $\mathrm{cor}(s_{1},s_{2})$ -\end_inset - - is a convex, positive function of -\begin_inset Formula $\mathrm{cor}(E_{1},E_{2})$ -\end_inset - -. - Moreover, for a given value of -\begin_inset Formula $\mathrm{cor}(E_{1},E_{2})$ -\end_inset - - one has -\begin_inset Formula $\mathrm{cor}(MUE_{1},MUE_{2})\ge\mathrm{cor}(Q_{95,1},Q_{95,2})$ -\end_inset - -. - As we explored only a fraction of the possible scenarii for the errors - distributions, these trends cannot be considered as general. - The main point is that the correlation of error sets is at least partially - transferred to the derived statistics, a fact to be considered when comparing - the values of these statistics. -\end_layout - -\begin_layout Subsection -Pair-wise comparison of errors -\end_layout - -\begin_layout Standard -The systematic improvement probability (SIP) between two methods -\begin_inset Formula $M_{i}$ -\end_inset - - and -\begin_inset Formula $M_{j}$ -\end_inset - - is the proportion of systems in the reference set for which the absolute - error decreases when using -\begin_inset Formula $M_{i}$ -\end_inset - - instead of -\begin_inset Formula $M_{j}$ -\end_inset - -. - It is estimated as -\begin_inset Formula -\begin{align} -\mathrm{SIP}_{i,j} & =\frac{D_{i,j}}{N}\\ -D_{i,j} & =\sum_{k=1}^{N}\mathbf{1}_{\delta_{k}(M_{i},M_{j})<0} -\end{align} - -\end_inset - -where -\begin_inset Formula $\mathbf{1}_{X}$ -\end_inset - - is the indicator function, taking for value 1 if -\begin_inset Formula $X$ -\end_inset - - is true and 0 otherwise, and -\begin_inset Formula -\begin{equation} -\delta_{k}(M_{i},M_{j})=|e_{k}(M_{i})|-|e_{k}(M_{j})| -\end{equation} - -\end_inset - -Note that, because of the possible presence of ties, one has -\begin_inset Formula $\mathrm{SIP}_{i,j}+\mathrm{SIP}_{j,i}\apprle1$ -\end_inset - -. -\end_layout - -\begin_layout Paragraph -Interpretation. -\end_layout - -\begin_layout Standard -A line of the SIP matrix, provides the SIP values for the corresponding - method over all the other ones. - If a new method -\begin_inset Formula $M_{1}$ -\end_inset - - provides systematic improvement over -\begin_inset Formula $M_{2}$ -\end_inset - -, in the sense that it has smaller absolute errors for all systems in the - reference set, one should have -\begin_inset Formula $\mathrm{SIP}_{1,2}=1$ -\end_inset - -. - Values smaller than 0.5 indicate a degradation. - Note however that -\begin_inset Formula $M_{1}$ -\end_inset - - can achieve small values of the SIP and still have better scores (MUE, - -\begin_inset Formula $Q_{95}$ -\end_inset - -), as a few large improvements might overwhelm many small degradations. - The interest of the SIP indicator is mainly to alert the user that using - a -\begin_inset Quotes eld -\end_inset - -better method -\begin_inset Quotes erd -\end_inset - - -\begin_inset Formula $M_{1}$ -\end_inset - - can lead to a degradation of results with respect to -\begin_inset Formula $M_{2}$ -\end_inset - -, with a probability approximately -\begin_inset Formula $(1-\mathrm{SIP}_{1,2})$ -\end_inset - -. - -\end_layout - -\begin_layout Paragraph -Mean SIP. -\end_layout - -\begin_layout Standard -In order to compare and rank a set of -\begin_inset Formula $K$ -\end_inset - - methods, one defines the Mean SIP ( -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none -MSIP -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit -) as the mean value of a line of the SIP matrix (excluding the diagonal) -\begin_inset Formula -\begin{equation} -\mathrm{MSIP}(M_{i})=\frac{1}{K}\sum_{j=1}^{K}\mathrm{SIP}_{i,j}\,(1-\delta_{ij})\label{eq:MSIP} -\end{equation} - -\end_inset - -The largest MSIP value points to a method which in average provides the - best level of improvement over the other methods in the set. - Note that the MSIP is not transferable for comparisons with new methods - out of its definition set. - -\end_layout - -\begin_layout Paragraph -Representation. -\end_layout - -\begin_layout Standard - -\color teal -In the same spirit as for correlation matrices, we represent SIP matrices - by a combination of color levels and disks. - Here, the color scale goes from blue (0.0) to red (1.0) with a white midpoint - (0.5), and the area of the disks is proportional with the SIP value. - The diagonal is left undefined to alleviate the graph. - The matrix should be read by line: a line with a majority of red patches - signal a method with good SIP performances. - A contrario, a majority of blue patches on a line indicate a method with - poor SIP performances. - The methods are ordered by decreasing value of MSIP. - -\end_layout - -\begin_layout Standard -Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:SIPMAT-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - provides an example extracted from Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - It shows clearly that BH&HLYP is problematic -\color teal -(line of small blue disks) -\color inherit -, for this dataset and is systematically and strongly outperformed by all - other methods. - At the opposite, the line for CAM-B3LYP is the only one to contain exclusively - values above 0.5 (reddish disks), albeit CAM-B3LYP does not achieve the - best MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - - scores within this set of methods ( -\emph on -cf. - -\emph default - Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Pernot2018_SIPHeatmap.png - lyxscale 25 - width 45text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:SIPMAT-example" - -\end_inset - - SIP matrix for a set of 9 methods compared on the G99 set of enthalpies - (case PER2018, Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - The area of a disk is proportional to the corresponding value. - The methods are ordered by decreasing value of MSIP (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:MSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Mean gain and loss. -\end_layout - -\begin_layout Standard -In order to appreciate the amplitude of the possible losses or gains when - switching between two methods, we define the mean gain (MG) as the mean - of the positive values of -\begin_inset Formula $\delta_{k}(M_{i},M_{j})$ -\end_inset - - -\begin_inset Formula -\begin{align} -\mathrm{MG}_{i,j} & =\frac{1}{D_{i,j}}\sum_{k=1}^{N}\mathbf{1}_{\delta_{k}(M_{i},M_{j})<0}\,\delta_{k}(M_{i},M_{j})\\ -\mathrm{ML}_{i,j} & =-\mathrm{MG}_{j,i} -\end{align} - -\end_inset - -where the mean loss (ML) is the opposite of the mean gain for the reciprocal - comparison. - These statistics are intended to convey an amplitude of the improvement - of -\begin_inset Formula $M_{i}$ -\end_inset - - over -\begin_inset Formula $M_{j}$ -\end_inset - -: MG is therefore a negative value (corresponding to a decrease of absolute - errors), and ML a positive value. - Moreover, the SIP and MG provide a decomposition of the MUE difference - between two methods: -\begin_inset Formula -\begin{align} -\Delta_{\mathrm{MUE}_{i,j}} & =\mathrm{MUE}(M_{i})-\mathrm{MUE}(M_{j})\\ - & =\mathrm{SIP}_{i,j}*\mathrm{MG}_{i,j}+\mathrm{SIP}_{j,i}*\mathrm{ML}_{i,j} -\end{align} - -\end_inset - -This shows that, except for methods pairs with an extreme SIP value, any - MUE difference is the balance between losses and gains distributed over - the systems. - One should not expect that a method with a smaller MUE will systematically - provide better results. - -\end_layout - -\begin_layout Paragraph -ECDF of -\begin_inset Formula $\delta_{k}(M_{i},M_{j})$ -\end_inset - -. -\end_layout - -\begin_layout Standard -The scores (SIP, MG and ML) can be visualized on a single graph of the Empirical - Cumulated Density Function (ECDF) of the differences of absolute errors - between two methods, as shown in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b). - This example is extracted from Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Borlido2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, on the prediction of band gaps. - It compares mBJ (MUE = 0.50 -\begin_inset space \thinspace{} -\end_inset - -eV) and LDA (MUE = 1.17 -\begin_inset space \thinspace{} -\end_inset - -eV), showing that the large MUE difference ( -\begin_inset Formula $\Delta_{\mathrm{MUE}}$ -\end_inset - -) between these methods is the balance of a mean gain -\begin_inset Formula $\mathrm{MG}=-0.86$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -eV for 85 -\begin_inset space \thinspace{} -\end_inset - -% of the systems (SIP), and a mean loss -\begin_inset Formula $\mathrm{ML}=0.37$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -eV for 15 -\begin_inset space \thinspace{} -\end_inset - -% of the systems. - In the hypothesis of a representative dataset, a user switching from LDA - to mBJ has to accept a 15 -\begin_inset space \thinspace{} -\end_inset - -% risk to see his results be degraded in average by 0.37 -\begin_inset space \thinspace{} -\end_inset - -eV, up to 1 -\begin_inset space \thinspace{} -\end_inset - -eV. - -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Borlido2019_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Borlido2019_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:Delta-example" - -\end_inset - - Statistics of absolute errors on band gaps for methods mBJ and LDA (case - BOR2019, -\emph on -cf. - -\emph default - Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Borlido2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) and their pair-wise differences: (a) ECDF of two error sets to be compared. - The MUE values are depicted by vertical dotted lines, and the -\begin_inset Formula $Q_{95}$ -\end_inset - - -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none -values -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit - by vertical dashed lines; (b) ECDF of the difference of absolute errors. - The green- and red-shaded bands represent 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence intervals for the reported statistics (SIP: systematic improvement - probability; MG: mean gain; ML: mean loss, -\begin_inset Formula $\Delta_{MUE}$ -\end_inset - -: MUE difference). - -\color teal -The orange bar represents an estimated level of uncertainty in the dataset. - It is a visual aid to evaluate the pertinence of the observed differences. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Note that this information is not accessible when considering the ECDFs - of the absolute errors (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)). - For the chosen example, the comparison of these ECDFs might leave the false - impression that mBJ has consistently smaller absolute errors than LDA, - which is an artifact due to the ignorance of data pairing (correlation) - in this representation. - -\end_layout - -\begin_layout Subsection -Pair-wise comparison of statistics -\end_layout - -\begin_layout Subsubsection -The testing framework -\end_layout - -\begin_layout Standard -Using the error sets for two methods -\begin_inset Formula $M_{1}$ -\end_inset - - and -\begin_inset Formula $M_{2}$ -\end_inset - -, one calculates the values -\begin_inset Formula $s_{1}=S(E_{1})$ -\end_inset - - and -\begin_inset Formula $s_{2}=S(E_{2})$ -\end_inset - - of a statistic -\begin_inset Formula $S$ -\end_inset - -. - The usual procedure to compare these two values is to test if their difference - is significantly larger than their combined uncertainty, -\emph on -i.e. -\emph default - -\begin_inset Formula -\begin{equation} -|s_{1}-s_{2}|>\kappa\thinspace u(s_{1}-s_{2})\label{eq:compare} -\end{equation} - -\end_inset - -where -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - - is the uncertainty on the difference, and -\begin_inset Formula $\kappa$ -\end_inset - - is an enlargement factor typically taken as -\begin_inset Formula $\kappa=2$ -\end_inset - - (or 1.96) in metrology -\begin_inset CommandInset citation -LatexCommand citep -key "Kacker2010" -literal "false" - -\end_inset - -. - In the hypothesis of a normal distribution for the statistics difference, - -\begin_inset Formula $\kappa=1.96$ -\end_inset - - corresponds to a confidence level of 95 -\begin_inset space \thinspace{} -\end_inset - -% for a two-sided test, implied by the absolute value in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:compare" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\color teal -If one has evidence that the distribution of differences is not normal, - -\begin_inset Formula $\kappa$ -\end_inset - - has to be chosen as the uncertainty enlargement factor providing a 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence interval for this distribution. - If the test is positive, there is less than 5 -\begin_inset space \thinspace{} -\end_inset - -% probability that the difference between -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - - is due to sampling effects. -\end_layout - -\begin_layout Standard -Assuming that -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - - cannot be null nor infinite, it is convenient to recast the test by using - a discrepancy factor -\begin_inset Formula -\begin{equation} -\xi(s_{1},s_{2})=\frac{|s_{1}-s_{2}|}{u(s_{1}-s_{2})}\label{eq:discFac-1} -\end{equation} - -\end_inset - -to be compared to the threshold -\begin_inset Formula $\kappa$ -\end_inset - -. - -\color teal -A probability value ( -\begin_inset Formula $p$ -\end_inset - --value) corresponding to -\begin_inset Formula $\xi$ -\end_inset - - is derived from the cumulated density function of the expected distribution - for -\begin_inset Formula $\xi$ -\end_inset - -. - For instance -\color inherit - -\begin_inset Formula -\begin{align} -p_{t} & =1-\Phi_{H}(\xi)\label{eq:pt-1}\\ - & =2*\left(1-\Phi(\xi)\right)\label{eq:pt} -\end{align} - -\end_inset - -where -\begin_inset Formula $\Phi_{H}(.)$ -\end_inset - - is the cumulative distribution function (CDF) of the standard half-normal - distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Leone1961" -literal "false" - -\end_inset - -, and -\begin_inset Formula $\Phi(.)$ -\end_inset - - is the CDF of the standard normal distribution. - The half-normal distribution is used to account for the absolute value - in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:discFac-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The -\begin_inset Formula $t$ -\end_inset - - index of -\begin_inset Formula $p_{t}$ -\end_inset - - refers here to the analogy with the two-sample -\begin_inset Formula $t$ -\end_inset - --test for equal means -\begin_inset CommandInset citation -LatexCommand cite -key "Snedecor1989" -literal "false" - -\end_inset - -. - For testing, the probability threshold corresponding to -\begin_inset Formula $P(\xi>\kappa=1.96)$ -\end_inset - - is -\begin_inset Formula $0.05$ -\end_inset - -. - -\color teal -For -\begin_inset Formula $p_{t}$ -\end_inset - - above this value, one cannot reject the hypothesis that the observed difference - between two values is due to chance. - -\color inherit - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -To overpass the normality hypothesis, one needs to characterize the CDF - of -\begin_inset Formula $\xi$ -\end_inset - -. - As for a given dataset one has single values for -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - -, this requires to generate alternative datasets by some sampling strategy - (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Bootstrap-based-comparison-of" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -In order to be able to estimate -\begin_inset Formula $p_{t}$ -\end_inset - -, one needs to evaluate the uncertainty on the difference of -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - -. - Formally, it can be obtained by the combination of variances -\begin_inset CommandInset citation -LatexCommand cite -key "GUM" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -u(s_{1}-s_{2})=\sqrt{u^{2}(s_{1})+u^{2}(s_{2})-2\mathrm{cov}(s_{1},s_{2})}\label{eq:u-diff-stat} -\end{equation} - -\end_inset - -The usefulness of this formula depends on several assumptions (theoretical - limits of the statistics not within a high probability interval around - their values, symmetry of error intervals... - -\begin_inset CommandInset citation -LatexCommand cite -key "Nicholls2014,Nicholls2016" -literal "false" - -\end_inset - -). - Nevertheless, it shows that the covariance between statistics can have - a major effect on the amplitude of -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - -. - In the limit of very strong positive correlation, the uncertainty on the - difference can become very small, impacting -\begin_inset Formula $\xi(s_{1},s_{2})$ -\end_inset - - and -\begin_inset Formula $p_{t}$ -\end_inset - -. -\end_layout - -\begin_layout Standard -To estimate the effect of correlation on the comparison of scores, we introduce - a variant -\begin_inset Formula $p_{unc}$ -\end_inset - - (uncorrelated) of -\begin_inset Formula $p_{t}$ -\end_inset - -, based on a version of the discrepancy ignoring correlation -\begin_inset Formula -\begin{align} -\xi_{unc}(s_{1},s_{2}) & =\frac{|s_{1}-s_{2}|}{\sqrt{u(s_{1})^{2}+u(s_{2})^{2}}}\label{eq:discFac-1-1}\\ -p_{unc} & =2*\left(1-\Phi(\xi_{unc})\right) -\end{align} - -\end_inset - -In the hypothesis of mostly positive covariances for the statistics of - interest (MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -; Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:MSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), -\begin_inset Formula $p_{unc}$ -\end_inset - - is expected to overestimate -\begin_inset Formula $p_{t}$ -\end_inset - -. -\end_layout - -\begin_layout Subsubsection -Bootstrap-based comparison of statistics -\begin_inset CommandInset label -LatexCommand label -name "subsec:Bootstrap-based-comparison-of" - -\end_inset - - -\end_layout - -\begin_layout Standard -Several strategies can be considered to compare pairs of statistics -\begin_inset Formula $(s_{1},s_{2})$ -\end_inset - - through a -\begin_inset Formula $p$ -\end_inset - --value. -\end_layout - -\begin_layout Paragraph - -\series bold -Estimate -\begin_inset Formula $u(s_{1})$ -\end_inset - -, -\begin_inset Formula $u(s_{2})$ -\end_inset - - and -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - - -\series default -. - -\end_layout - -\begin_layout Standard -The uncertainty on the statistics of interest (except for the MSE and RMSD) - and their covariance are not, to our knowledge, available in analytical - form. - In consequence, one has to use a numerical procedure, such as the bootstrap - to estimate them -\begin_inset CommandInset citation -LatexCommand cite -key "Efron1979,Hesterberg2015" -literal "false" - -\end_inset - -. - The application of the bootstrap to individual terms of Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:u-diff-stat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - will result in an accumulation of statistical uncertainties. - Besides, the estimation of covariances is very sensitive to outliers. - This approach is clearly suboptimal and is not recommended. -\end_layout - -\begin_layout Paragraph - -\series bold -Estimate directly -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - - -\series default -. - -\end_layout - -\begin_layout Standard -A better approach in the present context is to estimate directly (by bootstrap) - the uncertainty on the difference of scores. - This relieves some distributional hypotheses in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:u-diff-stat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, and enables the explicit correlation of samples of -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - - through paired-data sampling. - However, estimating a discrepancy factor leads us to use Eq. -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:pt" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - to estimate the -\begin_inset Formula $p$ -\end_inset - --value, with the associated normality hypothesis. - -\begin_inset Float algorithm -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: Two paired error sets -\begin_inset Formula $E_{1}$ -\end_inset - -, -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, a statistic estimator -\begin_inset Formula $S$ -\end_inset - -, and a number of bootstrap samples -\begin_inset Formula $B$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Bootstrap the statistics difference -\end_layout - -\begin_deeper -\begin_layout Enumerate -For -\begin_inset Formula $j=1:B$ -\end_inset - - -\end_layout - -\begin_deeper -\begin_layout Enumerate -Generate a -\begin_inset Formula $N$ -\end_inset - --sample of paired data with replacement -\begin_inset Formula $\longrightarrow\left(E_{1}^{*},E_{2}^{*}\right)$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Estimate -\begin_inset Formula $d_{j}=S(E_{1}^{*})-S(E_{2}^{*})$ -\end_inset - - -\end_layout - -\end_deeper -\end_deeper -\begin_layout Enumerate -Calculate a generalized -\begin_inset Formula $p$ -\end_inset - --value to test -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - - -\end_layout - -\begin_deeper -\begin_layout Plain Layout -\begin_inset Formula $p_{g}=2\min(p^{*},1-p^{*})$ -\end_inset - -, where -\begin_inset Newline newline -\end_inset - - -\begin_inset Formula $p^{*}=(A+0.5C)/B$ -\end_inset - - -\begin_inset Newline newline -\end_inset - - -\begin_inset Formula $A=\sum_{i=1}^{B}1_{d_{i}<0}$ -\end_inset - - -\begin_inset Newline newline -\end_inset - - -\begin_inset Formula $C=\sum_{i=1}^{B}1_{d_{i}=0}$ -\end_inset - - -\end_layout - -\end_deeper -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:methodM" - -\end_inset - -Method M -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -: testing the equality of a statistic -\begin_inset Formula $S$ -\end_inset - - for two paired samples by bootstrap and a generalized -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}$ -\end_inset - -). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph - -\series bold -Generalized -\begin_inset Formula $p$ -\end_inset - --value -\series default -. - -\end_layout - -\begin_layout Standard -The use of the generalized -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}$ -\end_inset - -), as proposed by Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997,Wilcox2012" -literal "false" - -\end_inset - - (method M; -\emph on -cf. - -\emph default - Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), conveniently avoids to estimate -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - -, and the incurring normality hypothesis of -\begin_inset Formula $p_{t}$ -\end_inset - -. - It is based on a simple counting of null and negative bootstrapped differences - of statistics with paired samples. - -\color teal -If -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - -, one expects that the bootstrap sample will generate positive and negative - values of their difference in equal amounts. - In this case, -\begin_inset Formula $p^{*}\simeq1-p^{*}\simeq0.5$ -\end_inset - - and -\begin_inset Formula $p_{g}$ -\end_inset - - is close to 1. - Note that the null values in the sample are shared equally between the - positive and negative values. - On the opposite, if there is a small proportion -\begin_inset Formula $p^{*}$ -\end_inset - - of negative values, the mean of the sample is positive, different from - zero. - The smaller -\begin_inset Formula $p^{*}$ -\end_inset - - the farther the mean from zero, and the lower the probability of the null, - -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - - hypothesis. - The symmetric case occurs for large values of -\begin_inset Formula $p^{*}$ -\end_inset - - (small values of -\begin_inset Formula $1-p^{*}$ -\end_inset - -). - As one do not care for the sign of the difference, a factor two is applied - to estimate -\begin_inset Formula $p_{g}$ -\end_inset - -. - The identity of this algorithm with the analytical -\begin_inset Formula $p$ -\end_inset - --value for the comparison of normal samples means is established in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Estimation-of--values" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -Note that the use of paired samples is essential to capture inter-statistics - correlations. - Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - have shown that their 'method M' provides a well controlled level of type - I errors for the comparison of quantiles at the 0.05 level. - They estimated that dataset sizes of -\begin_inset Formula $N\ge30$ -\end_inset - - are necessary when comparing quantiles up to 0.9. - Using the same protocol, we estimated that for the comparison of -\begin_inset Formula $Q_{95}$ -\end_inset - - values at the same 0.05 level, -\begin_inset Formula $N\ge60$ -\end_inset - - is requested. - Details are presented in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Type-I-error" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\color orange - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\color orange -In Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Examples" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, we show through simulation how the third strategy improves over the second - one. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsubsection -Rank inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - -\begin_inset CommandInset label -LatexCommand label -name "subsec:Rank-inversion-probability" - -\end_inset - - -\end_layout - -\begin_layout Standard -In a previous article -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018,Pernot2019" -literal "false" - -\end_inset - -, we defined a probability for ranking inversion -\begin_inset Formula $P_{inv}=P(S_{1}s_{2})$ -\end_inset - -, based on the normal distribution. -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Our goal was to estimate the probability that a given ranking results from - a limited benchmark dataset. - -\end_layout - -\end_inset - - Using the present notations, -\begin_inset Formula $P_{inv}$ -\end_inset - - can be reformulated as -\begin_inset Formula -\begin{align} -P_{inv} & =\Phi(0,\mu=s_{1}-s_{2},\sigma=\sqrt{u^{2}(s_{1})+u^{2}(s_{2})})\\ - & =\Phi(0,\mu=\xi_{unc})\\ - & =\Phi(-\xi_{unc})\\ - & =1-\Phi(\xi_{unc})\\ - & =p_{unc}\thinspace/\thinspace2 -\end{align} - -\end_inset - -This shows the limitations of our previous definition of -\begin_inset Formula $P_{inv}$ -\end_inset - -, -\emph on -i.e. -\emph default -, the normality hypothesis and the neglect of error sets correlations. - Using the same difference statistics used for -\begin_inset Formula $p_{g}$ -\end_inset - - (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), one can generalize -\begin_inset Formula $P_{inv}$ -\end_inset - - as the probability to have differences in the bootstrap sample with a sign - opposite to the reference one ( -\begin_inset Formula $\mathrm{sign}(s_{1}-s_{2})$ -\end_inset - -), -\emph on -i.e. -\emph default -, -\begin_inset Formula -\begin{align} -P_{inv} & =\frac{1}{B}\left(\sum_{i=1}^{B}1_{\mathrm{sign}(d_{i})\ne\mathrm{sign}(s_{1}-s_{2})}-\sum_{i=1}^{B}1_{d_{i}=0}\right)\label{eq:pinv-new} -\end{align} - -\end_inset - -where -\begin_inset Formula $B$ -\end_inset - - is the number of bootstrap samples and the null differences (with sign - 0) are excluded from the count. - -\end_layout - -\begin_layout Standard -If one takes as reference -\begin_inset Formula $s_{2}$ -\end_inset - - the smallest value -\begin_inset Formula $\hat{s}$ -\end_inset - - of a statistic within a set of methods, one gets -\begin_inset Formula -\begin{align} -P_{inv} & =\frac{1}{B}\left(\sum_{i=1}^{B}1_{\mathrm{sign}(d_{i})\ne\mathrm{sign}(s_{1}-\hat{s})}-\sum_{i=1}^{B}1_{d_{i}=0}\right)\\ - & =\frac{1}{B}\left(\sum_{i=1}^{B}1_{d_{i}<0}-\sum_{i=1}^{B}1_{d_{i}=0}\right)\\ - & \simeq p_{g}\thinspace/\thinspace2\label{eq:pinv-vs-pg} -\end{align} - -\end_inset - -where the relation to -\begin_inset Formula $p_{g}$ -\end_inset - - assumes a negligible probability to have null statistics differences and - exploits the fact that that -\begin_inset Formula $\sum_{i=1}^{B}1_{d_{i}<0}<\sum_{i=1}^{B}1_{d_{i}>0}$ -\end_inset - - for our choice of reference -\begin_inset Formula $\hat{s}$ -\end_inset - -. - -\end_layout - -\begin_layout Subsubsection -Ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - -\begin_inset CommandInset label -LatexCommand label -name "subsec:Ranking-probability-matrix" - -\end_inset - - -\end_layout - -\begin_layout Standard -A measure of the reliability of a statistic-based ranking can be estimated - by bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Hall2009" -literal "false" - -\end_inset - -. - This approach has notably been used by Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - - to study how the sample size affects the probability for a DFA to be ranked - first on the basis of its prediction uncertainty. - We apply it here to compute, for a set of -\begin_inset Formula $K$ -\end_inset - - methods scored by a statistic -\begin_inset Formula $S$ -\end_inset - -, a ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - giving, for each method, its probability to have any rank -\begin_inset Formula -\begin{equation} -P_{r,jk}=P(\mathrm{rank}(S_{j})=k);\thinspace j,k=1,\ldots,K -\end{equation} - -\end_inset - -The algorithm to generate this matrix is described in Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:bs-rank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\begin_inset Float algorithm -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: -\begin_inset Formula $K$ -\end_inset - - paired error sets, -\begin_inset Formula $E_{1},\ldots,E_{K}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, a statistic estimator -\begin_inset Formula $S$ -\end_inset - -, and a number of bootstrap samples -\begin_inset Formula $B$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Bootstrap the ranks -\end_layout - -\begin_deeper -\begin_layout Enumerate -For -\begin_inset Formula $j=1:B$ -\end_inset - - -\end_layout - -\begin_deeper -\begin_layout Enumerate -Generate a -\begin_inset Formula $N$ -\end_inset - --sample of paired data with replacement -\begin_inset Formula $\longrightarrow\left(E_{1}^{*},\ldots,E_{K}^{*}\right)$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Estimate the statistics vector -\begin_inset Formula $S^{*}=\left(S(E_{1}^{*}),\ldots,S(E_{K}^{*})\right)$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Estimate the ranks by increasing order of -\begin_inset Formula $S^{*}$ -\end_inset - -: -\begin_inset Formula $O_{j}^{*}=\mathrm{order}(S^{*})$ -\end_inset - -, -\begin_inset Newline newline -\end_inset - -where -\begin_inset Formula $O_{j}^{*}$ -\end_inset - - is a -\begin_inset Formula $K$ -\end_inset - --vector of integer values. - -\end_layout - -\end_deeper -\end_deeper -\begin_layout Enumerate -Estimate for each method its probability to have any rank -\end_layout - -\begin_deeper -\begin_layout Plain Layout -\begin_inset Formula -\[ -P_{r,jk}=\frac{1}{B}\sum_{i=1}^{B}1_{O_{ij}^{*}=k} -\] - -\end_inset - - -\end_layout - -\end_deeper -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:bs-rank" - -\end_inset - -Estimating the rank probabilities for a set of methods. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Representations. -\end_layout - -\begin_layout Standard -Two representations for this matrix are used by Hall and Miller -\begin_inset CommandInset citation -LatexCommand cite -key "Hall2009" -literal "false" - -\end_inset - -, either a levels image (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:bsRank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), or a summary by probability intervals (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:bsRank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)). - In the following, we will use mostly the levels image representation which - we find easier to read and interpret. - -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Pernot2018_figRanks_mue_levels.png - lyxscale 25 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Pernot2018_figRanks_mue_ci.png - lyxscale 25 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:bsRank" - -\end_inset - -Graphical representations of sample-size effect on MUE ranking: (left) levels - image of the ranking probability matrix; (right) summary of the ranking - probability matrix by the modes (diamonds) and 90 -\begin_inset space \thinspace{} -\end_inset - -% probability intervals. - The data are taken from the case Pernot2018 (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - Both representations indicate a possible ranking inversion between B97-1, - CAM-B3LYP and PBE0, -\emph on -i.e. -\emph default -, the reference ranking based on the MUE is not certain for this trio. - Similar problems occur within two other groups, notably BLYP and PW86PBE. - The ranks of PBE (8) and BH&HLYP (9) seem well established. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -A summary in results tables can also be considered, by reporting for each - method its mode in ranking probability and the corresponding probability, - which indicates the strength of this rank. - These tools enable to appreciate easily the robustness of the reference - ranking. -\end_layout - -\begin_layout Paragraph -Remarks. -\end_layout - -\begin_layout Itemize -As discussed by Hall and Miller -\begin_inset CommandInset citation -LatexCommand cite -key "Hall2009" -literal "false" - -\end_inset - -, the standard bootstrap used in the present article ( -\begin_inset Formula $N$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - sampling) tends to underestimate the dispersion of the ranks. - Better estimates would be obtained by a -\begin_inset Formula $N'$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - sampling ( -\begin_inset Formula $N' - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Code -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Property -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $N$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $K$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Source -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PER2018 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Intensive atomization energies -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -222 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -9 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018,Pernot2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BOR2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Band gaps -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -471 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -15 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand citep -key "Borlido2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -NAR2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Enthalpies of formation -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -469 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Narayanan2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -CAL2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -London Dispersion Corrections -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -41 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3*10 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Caldeweyher2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -JEN2018 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Non-covalent interaction energies -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -66 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Jensen2018" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DAS2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Dielectric Constants -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -23 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Das2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -THA2015 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Polarizability -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -135 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -7 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -WU2015 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Polarizability -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -145 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -34 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Wu2015b" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -ZAS2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Effective atomization energies -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6211 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Zaspel2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:Case-studies" - -\end_inset - -Case studies: -\begin_inset Formula $N$ -\end_inset - - is the number of systems in the dataset and -\begin_inset Formula $K$ -\end_inset - - is the number of compared methods. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -PER2018 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Pernot2018" - -\end_inset - - -\end_layout - -\begin_layout Standard -We consider here the intensive atomization energies -\begin_inset CommandInset citation -LatexCommand cite -key "Perdew2016" -literal "false" - -\end_inset - - estimated with 9 DFAs on the G3/99 dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Curtiss2000" -literal "false" - -\end_inset - -, and extracted from the article by Pernot and Savin -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018,Pernot2019" -literal "false" - -\end_inset - -. - This medium-sized dataset ( -\begin_inset Formula $N=222$ -\end_inset - -) presents several non-normal error distributions, and was used to illustrate - the interest for benchmarks of using -\begin_inset Formula $Q_{95}$ -\end_inset - - as a complement to the MUE, and to illustrate our former definition of - -\begin_inset Formula $P_{inv}$ -\end_inset - -. - Here we focus on the correlations and their impact on the comparison of - statistics. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The correlation matrices between the error sets and their statistics are - represented in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, along with histograms of correlation coefficients of their non-diagonal - elements. - The errors sets are all positively correlated, with a wide distribution, - except for pairs involving BH&HLYP which presents negative correlations - with four other methods. - When considering the scores, all correlations are positive or null. - Globally, the correlations are weaker for -\begin_inset Formula $Q_{95}$ -\end_inset - - than for the MUE, except for a few pairs. - The maximum of the histograms shifts from 0.6 for MUE to 0 for -\begin_inset Formula $Q_{95}$ -\end_inset - -, but large correlation values are nevertheless still observed for -\begin_inset Formula $Q_{95}$ -\end_inset - -. - These observations confirm the main trends from the numerical study of - correlation transfer in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\begin_inset Float figure -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Pernot2018_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32col% - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Pernot2018_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32col% - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Pernot2018_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32col% - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Pernot2018_HistCorrs.png - lyxscale 20 - width 99col% - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:pernot1" - -\end_inset - - Case PER2018 - correlations: (top) rank correlation matrices between Errors - sets, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - ; (bottom) histogram of non-diagonal elements of the corresponding correlation - matrices. - The methods are ordered by a clustering algorithm of the errors correlation - matrix ( -\family typewriter -hclust -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "CiteR" -literal "false" - -\end_inset - -) -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -\begin_inset Float table -placement p -wide false -sideways true -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{unc}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{g}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{unc}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{g}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.18(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.5(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.53(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.05(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.48(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B97-1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.85(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BH&HLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.8(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -11.7(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.06(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.95(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --4.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.6(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5.3(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.43(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.77(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.2(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.6(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -CAM-B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.90(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.64 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.29 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.1(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.74(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.09(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.03 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.3(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.65(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.43(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.1(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.44(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.8(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -8.1(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.81(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.92(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.44 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.24 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.12 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.3(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.02 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.50(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.74(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PW86PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.6(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.1(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.43(6) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:pernot" - -\end_inset - -Case PER2018: Absolute error statistics, -\begin_inset Formula $p$ -\end_inset - --values and inversion probabilities and SIP statistics for comparisons with - respect to the DFA with the smallest MUE (B97-1), the reported SIP values - correspond to the B97-1 line of the SIP matrix. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - -The statistics are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - Note that, due to the use of a different quantile estimation algorithm, - the values of -\begin_inset Formula $Q_{95}$ -\end_inset - - have changed slightly from the values reported in Ref. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -There is a group of three methods (B97-1, CAM-B3LYP and PBE0) with small - MUE values. - Considering the -\begin_inset Formula $p_{g}$ -\end_inset - - values, one cannot reject the hypothesis that the observed differences - are due to chance. - Note that the same conclusion would have been reached when ignoring correlation - ( -\begin_inset Formula $p_{unc}$ -\end_inset - -), as the neglect of correlation increases the -\begin_inset Formula $p$ -\end_inset - --values, but no other one reaches the 0.05 threshold. - However, the -\begin_inset Formula $p_{unc}$ -\end_inset - - value for LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE reaches 0.03, not far from the threshold. - Consistently, the MUE inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - computed in the reference article -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2019" -literal "false" - -\end_inset - -, included LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE in the group of methods with a sizable risk of inversion. - As demonstrated in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:pinv-vs-pg" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, the revised version of -\begin_inset Formula $P_{inv}$ -\end_inset - - accounting for correlations is now practically equal to -\begin_inset Formula $p_{g}/2$ -\end_inset - -, which rejects LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE as a contender for the head group. - When picking B97-1 instead of CAM-B3LYP based on the MUE, there is a 29 -\begin_inset space \thinspace{} -\end_inset - -% chance to be wrong, -\emph on -i.e. -\emph default -, that the MUE of CAM-B3LYP is indeed smaller than B97-1 due to the restricted - sample size. - This risks falls to 12 -\begin_inset space \thinspace{} -\end_inset - -% for PBE0. -\end_layout - -\begin_layout Standard -The situation is different for -\begin_inset Formula $Q_{95}$ -\end_inset - -, where the neglect of correlation would lead to the conclusion that PBE0 - (3.3(5) -\begin_inset space \thinspace{} -\end_inset - -kcal/mol) is not significantly distinct from B97-1 (2.7(4) -\begin_inset space \thinspace{} -\end_inset - -kcal/mol; -\begin_inset Formula $p_{unc}=0.33$ -\end_inset - -) whereas the correct value is given by -\begin_inset Formula $p_{g}=0.02$ -\end_inset - -. - In this example, -\begin_inset Formula $Q_{95}$ -\end_inset - - can help us to rank the three best methods, for which the MUE is not discrimina -nt. - This is linked to the presence of different tails in the absolute errors - distributions (cf. - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Standard -This example enabled to illustrate and confirm the relations between -\begin_inset Formula $p_{unc}$ -\end_inset - -, -\begin_inset Formula $p_{g}$ -\end_inset - - and -\begin_inset Formula $P_{inv}$ -\end_inset - - expressed in Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Rank-inversion-probability" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - In the following examples, only -\begin_inset Formula $P_{inv}$ -\end_inset - - will be reported. -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP analysis brings another view on the head trio (B97-1, CAM-B3LYP - and PBE0), as the method with the highest MSIP is CAM-B3LYP. - One can see on the SIP matrix in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:SIPMAT-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, that indeed, the row for CAM-B3LYP is fully reddish, when those for B97-1 - and PBE0 present also blue and white patches. -\end_layout - -\begin_layout Standard -The ECDF of the difference of absolute errors for CAM-B3LYP and B97-1 helps - to understand the contradiction between the MUE and MSIP ranks (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)). - The MUE difference for this pair is statistically not significant ( -\begin_inset Formula $p_{g}=0.57$ -\end_inset - -), the SIP value is -\begin_inset Formula $1-0.33=0.67$ -\end_inset - - – a small improvement of CAM-B3LYP over B97-1 – the mean gain -0.6 kcal/mol - and the mean loss -1.3 kcal/mol, due to the heavy tail in the CAM-B3LYP - error distribution. - So by switching from B97-1 to CAM-B3LYP, one has to accept a 35 -\begin_inset space \thinspace{} -\end_inset - -% risk to degrade the intensive atomization energies by 1.3 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol in average and up to 4 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol. - The same comparison between CAM-B3LYP and PBE0 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)) shows that there is no strong basis to favor either method. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Pernot2018_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Pernot2018_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Pernot2018_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:pernot1-1" - -\end_inset - -Case PER2018 - absolute errors statistics: (a) ECDF and statistics of absolute - errors; (b-c) ECDF and statistics of the difference of absolute errors. - See Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details. - The light orange band depicts the chemical accuracy (1 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -The ranking probability matrices (Figs -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:bsRank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - and -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) confirm the analysis. - The group of three methods (B97-1, CAM-B3LYP and PBE0) at the top of the - MUE ranking presents a blurred image (no clear diagonal), whereas the first - -\begin_inset Formula $Q_{95}$ -\end_inset - - rank of B97-1 is not ambiguous. - As expected, the MSIP ranking favors solidly CAM-B3LYP. - Globally, B97-1 should be preferred to minimize the risk of large errors, - where CAM-B3LYP would provide overall smaller absolute errors. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Pernot2018_figRanks_q95hd_levels.png - lyxscale 25 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Pernot2018_figRanks_msip_levels.png - lyxscale 25 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:pernot2" - -\end_inset - -Case PER2018: levels representation of the ranking probability matrix for - -\begin_inset Formula $Q_{95}$ -\end_inset - - and MSIP. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Subsection -\noindent -BOR2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Borlido2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -Band gap estimations for a set of 471 systems -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -The original dataset contains 472 systems, but several values are missing - for -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -ce{NaYbP2S6} -\end_layout - -\end_inset - -, which was excluded. -\end_layout - -\end_inset - - by 15 DFAs were extracted from the Supplementary Information of a recent - article by Borlido -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand citep -key "Borlido2019" -literal "false" - -\end_inset - -. - For a full description of the dataset, we refer the reader to the original - article. - -\end_layout - -\begin_layout Standard -The reference authors reported and analyzed relative errors, but as there - is a large range of band gaps in this set this causes a dispersion of relative - errors over six orders of magnitude, and an unsuitable distortion of the - errors distributions, with large relative errors for small band gaps, and - small relative errors for large band gaps. - It is true that for some methods ( -\emph on -e.g. -\emph default -, LDA) the errors increase with the value of the band gap, but this is due - mostly to a systematic deviation (trend), not an increase in the dispersion - of the errors. - In consequence, we chose treat here the 'absolute' errors, as defined in - Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:errors-def" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -The reference authors report in their Table 1 Kendall and Pearson correlation - coefficients between the calculated and reference values. - First, one should remind that correlation coefficients are not reliable - performance indicators -\begin_inset CommandInset citation -LatexCommand citep -key "Bland1986" -literal "false" - -\end_inset - -. - At most, they reveal a linear (Pearson) or monotonic (Kendall) association - between two variables -\begin_inset Formula $X$ -\end_inset - - and -\begin_inset Formula $Y$ -\end_inset - -, but not their proximity to the identity ( -\begin_inset Formula $X=Y$ -\end_inset - -) line. - Nevertheless, the notable difference ( 0.1- 0.2) between both correlation - estimators, for each method, point to the presence of outliers and/or a - non-linear relationships. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Borlido -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand citep -key "Borlido2019" -literal "false" - -\end_inset - - discuss the uncertainties on the reference band gaps in their dataset and - estimate it to a few tenths of eV. - Without more detailed information, we assume that this represents a uniform - uncertainty for the dataset. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -One sees in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlido1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - that across the spectrum of methods, all correlation coefficients are positive, - and can reach very large values, up to 0.998. - -\color orange - -\color inherit -Only about 30 -\begin_inset space \thinspace{} -\end_inset - -% of the dataset pairs have correlation coefficients below 0.6, involving - notably PBE0_mix and HSE_mix. - If the error sets are dominated by method errors ( -\emph on -i.e. -\emph default -, there are no large reference data uncertainty, nor outliers), -\color red - -\color inherit -the correlation matrix can be used to infer a clustering of methods, describing - the relationships of the methods for the current property/dataset. - Error sets with large correlation coefficients are related by a linear - or monotonous transformation and the corresponding methods are clustered - together. - The presence of well delimited clusters indicates that the error sets are - not dominated by reference data errors. - From the correlation matrix, the clusters would be (HLE16, HLE16+SOC), - (BJ, SCAN, LDA, PBE, PBE_SOL, LDA+SOC, PBE+SOC), (HSE_mix, PBE0_mix) and - (HSE06,PBE0). - mBJ and HSE14 stay alone. - -\color teal - The clustering observed seems to find blocks that correspond to physical - intuition: LDA, PBE, SCAN, ... - have all an electron-gas background. - This is relaxed for HLE16 that differs fro HLE16+SOC only by taking into - account spin-orbit coupling. - These methods are further decoupled from hybrid methods (PBE0, HSE06). - -\end_layout - -\begin_layout Standard -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Borlido2019_Cormat_Errors_Spearman.png - lyxscale 40 - width 50text% - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlido1" - -\end_inset - -Case BOR2019: rank correlation between errors sets. - The methods are ordered by a clustering algorithm using the complete linkage - method -\begin_inset CommandInset citation -LatexCommand cite -key "Defays1977" -literal "false" - -\end_inset - - -\color teal -implemented in the -\family typewriter -R -\family default - function -\family typewriter - hclust -\family default -\color inherit - -\begin_inset CommandInset citation -LatexCommand cite -key "CiteR" -literal "false" - -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The values are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:borlido" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - Although mBJ presents the smallest MUE (0.50(2) -\begin_inset space \thinspace{} -\end_inset - -eV), the value for HSE06 is very close (0.53(5) -\begin_inset space \thinspace{} -\end_inset - -eV), and one cannot exclude that the difference is due to a mere sampling - effect ( -\begin_inset Formula $p_{g}\simeq2P_{inv}=0.16$ -\end_inset - -). - Besides, HSE06 is the only method with a notably non-zero -\begin_inset Formula $P_{inv}$ -\end_inset - - value for the MUE. - mBJ is also the method with the smallest -\begin_inset Formula $Q_{95}$ -\end_inset - -, and no other method is able to challenge this rank. - mBJ has also the largest MSIP, but its value is moderate (0.7), indicating - that mBJ does not provide a full systematic improvement over (some of) - the other methods. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -When considering -\begin_inset Formula $Q_{95}$ -\end_inset - -, ignoring the correlation would lead us to conclude that HSE06 is still - a contender to mBJ ( -\begin_inset Formula $p_{unc}=0.1)$ -\end_inset - -, but -\begin_inset Formula $p_{g}$ -\end_inset - - tells us the opposite, -\emph on -i.e. - -\emph default - that mBJ has a significantly better -\begin_inset Formula $Q_{95}$ -\end_inset - - (1.41(7) -\begin_inset space \thinspace{} -\end_inset - -eV) than HSE06 (1.7(2) -\begin_inset space \thinspace{} -\end_inset - -eV). -\end_layout - -\end_inset - - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LDA -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.17(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.2(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.84(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.87(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.41(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LDA + SOC -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.24(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.16(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.86(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.92(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.38(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.05(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.41(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.82(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.76(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.40(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE + SOC -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.12(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.83(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.82(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.37(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE_SOL -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.12(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.1(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.83(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.82(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.42(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HLE16 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.60(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.9(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.44(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.23(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HLE16 + SOC -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.65(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.48(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BJ -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.79(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.55(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.75(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.49(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.31(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -mBJ -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.50(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -1.41(7) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.69(2) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -SCAN -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.81(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.4(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.55(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.74(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.53(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HSE06 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.53(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.7(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.68(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.52(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.28(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HSE14 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.63(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.56(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.38(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HSE06_mix -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.64(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.0(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.60(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.58(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.51(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.36(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.78(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.44(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.72(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.57(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.46(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0_mix -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.82(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.4(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.67(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.37(3) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:borlido" - -\end_inset - -Case BOR2019: Absolute error statistics and -\begin_inset Formula $p$ -\end_inset - --values for the comparison with respect to the DFA with the smallest MUE - (B97-1). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP values for mBJ lie between 0.49 and 0.86. - The latter value is against LDA+SOC, which means that for 14 -\begin_inset space \thinspace{} -\end_inset - -% of the systems, LDA+SOC achieves smaller absolute errors than mBJ, despite - its poor scores. - Interestingly, small values of 0.52-0.53 are also observed against HLS16, - HLSE16+SOC and HSE06, indicating a notable risk of performance loss when - switching from one of these methods to mBJ. -\end_layout - -\begin_layout Standard -As seen previously, when going from LDA to mBJ (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), one has less than 15 -\begin_inset space \thinspace{} -\end_inset - -% chance to perform better using LDA, and the mean gain more than doubles - the mean loss. - By contrast, the comparison of mBJ to HSE06 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlido2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) is an example of undecidability: the MUE difference is not significantly - different from zero, and one has as much to loose as to gain by switching - between both methods. -\end_layout - -\begin_layout Standard -The SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlidoSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) provides a convenient summary of these observations. - The mBJ line is mostly reddish with white spots indicating neutral comparisons. - In contrast, the LDA+SOC line is fully blueish, indicating that it is dominated - by all other methods. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Borlido2019_compareECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Borlido2019_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlido2" - -\end_inset - - Case BOR2019 - absolute errors statistics: (a) ECDF of the absolute errors; - (b) ECDF of the difference of absolute errors for mBJ and HSE06. - See Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details. - -\color teal -The orange band depicts a reasonable level of uncertainty in the dataset - (0.2 -\begin_inset space \thinspace{} -\end_inset - -eV). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Borlido2019_SIPHeatmap.png - lyxscale 35 - width 45text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlidoSIP" - -\end_inset - -Case BOR2019: SIP matrix. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -Ranking probability matrices for the MUE, -\begin_inset Formula $Q_{95}$ -\end_inset - - and MSIP are presented in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlidoRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - They illustrate the previous results and show that ranking by MUE beyond - the second place becomes uncertain. - This is even more notable for -\begin_inset Formula $Q_{95.}$ -\end_inset - -. - The MSIP ranking selects the same group of five methods as the MUE ranking, - with some inversions. - At the opposite, an end-group of five methods is rather well ascertained. - -\end_layout - -\begin_layout Standard -These matrices are a convenient tool to visualize the impact of dataset - size on the ranking quality. - We estimated them for reduced error sets ( -\begin_inset Formula $N=235$ -\end_inset - - and -\begin_inset Formula $N=100$ -\end_inset - -), sampled randomly from the original one. - The impact is clearly visible, as the diagonal contributions get weaker - when -\begin_inset Formula $N$ -\end_inset - - decreases. - For the MUE, the block of ranks 1 and 2 is quite robust, but the situation - deteriorates for the upper ranks. - For -\begin_inset Formula $Q_{95}$ -\end_inset - -, the first place of mBJ is very stable, but the upper ranks become very - uncertain, up to the last ranks for -\begin_inset Formula $N=100$ -\end_inset - -. - As for the MUE, the MSIP ranking suffers from the reduced datasets, but - the head group of five methods is well preserved. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_mue_levels_sel.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_q95hd_levels_sel.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_msip_levels_sel.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_mue_levels_sel100.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_q95hd_levels_sel100.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Borlido2019_figRanks_msip_levels_sel100.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlidoRPmat" - -\end_inset - -Case BOR2019: ranking probability matrices for the full dataset (top row, - -\begin_inset Formula $N=471$ -\end_inset - -), and for reduced sets ( -\begin_inset Formula $N=235$ -\end_inset - - and -\begin_inset Formula $100$ -\end_inset - -). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Subsection -NAR2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Narayanan2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -The dataset contains the calculated enthalpies of formation by G4MP2 for - 469 molecules having experimental values with small uncertainty (Pedley - test set) -\begin_inset CommandInset citation -LatexCommand cite -key "Narayanan2019" -literal "false" - -\end_inset - -. - The G4MP2 values are compared with those of B3LYP, M06-2X and -\begin_inset Formula $\omega$ -\end_inset - -B97X-D. - -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The most remarkable feature in the correlation matrices in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - is the decorrelation of G4MP2 errors from the other error sets. - For the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -, weak positive correlations appear, more notably for -\begin_inset Formula $Q_{95}$ -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Narayanan2019_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Narayanan2019_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Narayanan2019_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:narayanan1" - -\end_inset - -Case NAR2019 - rank correlation matrices: (a) Errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The statistics reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:narayanan" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - show the supremacy of G4MP2 over the three DFAs for all statistics. - Narayanan -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Narayanan2019" -literal "false" - -\end_inset - - claim an -\begin_inset Quotes eld -\end_inset - -accuracy -\begin_inset Quotes erd -\end_inset - - -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -The MUE is sometimes abusively used to characterize the -\emph on -accuracy -\emph default - of a method, which cannot be the case when error distributions are not - zero-centered normal -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Pernot2018" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - (MUE) of 0.79 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol with G4MP2. - However, a look at the absolute errors CDFs (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)) shows that for G4MP2, there is still a probability of about 20 -\begin_inset space \thinspace{} -\end_inset - -% that the absolute errors exceed 1 kcal/mol, and 5 -\begin_inset space \thinspace{} -\end_inset - -% to exceed 2.2 kcal/mol. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -G4MP2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.79(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.21(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.81(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -9.3(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.22(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.89(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --3.7(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.52(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -M06-2X -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.71(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.1(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.37(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.83(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.5(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.82(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $\omega$ -\end_inset - -B97X-D -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.85(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5.2(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.73(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.62(5) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:narayanan" - -\end_inset - -Case NAR2019: same as Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for case Narayanan2019. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -G4MP2 presents a high degree of systematic improvement over the three DFAs - (MSIP = 0.81). - Nonetheless, there is about 25 -\begin_inset space \thinspace{} -\end_inset - -% probability that -\begin_inset Formula $\omega$ -\end_inset - -B97X-D performs better, but with a rather small value of ML (0.62 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol), when compared to the chemical accuracy (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d)). - In contrast, the mean gain when using G4MP2 instead of -\begin_inset Formula $\omega$ -\end_inset - -B97X-D is about -1.7 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol for about 75 -\begin_inset space \thinspace{} -\end_inset - -% of the systems. - The advantage of G4MP2 over B3LYP is more spectacular (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Narayanan2019_compareECDF.png - lyxscale 20 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Narayanan2019_SIPHeatmap.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Narayanan2019_deltaECDF.png - lyxscale 20 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Narayanan2019_deltaECDF2.png - lyxscale 20 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:narayanan2" - -\end_inset - - Case NAR2019: (a) ECDF of the absolute errors; (b) SIP matrix; (c,d) ECDF - of the difference of absolute errors of B3LYP and -\begin_inset Formula $\omega$ -\end_inset - -B97X-D with respect to G4MP2. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Ranking. -\end_layout - -\begin_layout Plain Layout -All the ranking probability matrices are diagonal (not shown). - There is no risk of inversion, a conjunction of the use of a large dataset - and a small set of representative methods. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -CAL2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Caldeweyher2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -The impact of an atomic-charge dependent London dispersion correction (D4 - model) has been evaluated by Caldeweyher -\emph on -at al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Caldeweyher2019" -literal "false" - -\end_inset - - on a large series of datasets. - From those, we selected one of the largest ones, -\emph on -i.e. -\emph default -, the reference energies for the MOR41 transition metal reaction benchmark - set -\begin_inset CommandInset citation -LatexCommand cite -key "Dohm2018" -literal "false" - -\end_inset - -, available as Tables -\begin_inset space \thinspace{} -\end_inset - -14-18 in the Supplementary Information of the reference article. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Reproducibility note: these data are inconsistent with the results reported - in Fig. -\begin_inset space \thinspace{} -\end_inset - -9 of the reference article and the subsequent discussion. - We contacted the corresponding author (S. - Grimme) who kindly sent us a corrected version of the Supplementary Information. - -\end_layout - -\end_inset - - The reference data are calculated values, with a priori no significant - numerical uncertainty. - The London dispersion corrections have been tested on a series of 10 DFAs. - Note that the nomenclature used here for the corrections is the one provided - in the SI table, which differs somewhat from the one used in the reference - article. - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The results are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:caldeweyher" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, where DFT-D3 has been taken as reference throughout. - The aim here is to check if DFT-D4 brings significant differences. - It is notable that with a set of size 41, the sampling uncertainty is rather - large for both statistics (typically on the second or first digit). - Nevertheless, significant MUE improvements are observed when passing from - DFT-D3 do DFT-D4, except for revPBE and PW6B95. - In the latter case, the better MUE of the D3 calculations, noted by the - reference authors, might be due to a random effect of dataset selection. - Based on -\begin_inset Formula $Q_{95}$ -\end_inset - - the improvements due to D4 are not significant, except for DOD-PBE, DSD-PBE - and RPBE. - Globally, DFT-D4 improves the MUE, but does not reduce the risk of large - errors. - -\begin_inset Float table -placement p -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size small -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DOD-PBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.1(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.63(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DOD-PBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.1(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.65(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.44(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.28(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.24(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DOD-PBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -3.5(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.13(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.83(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.8(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DSD-PBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.9(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DSD-PBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.9(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DSD-PBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -3.7(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.29(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.71(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.5(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.7(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -B3LYP-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -4.2(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.56(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.41(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.22(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.21(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -B3LYP-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -4.2(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.11 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.56(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -B3LYP-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -4.8(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -13(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.26(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.71(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.1(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE0-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE0-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE0-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -2.6(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.29(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.61(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.4(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PW6B95-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.2(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.03 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7.9(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.33 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.56(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -1.0(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PW6B95-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.0(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.11 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7.8(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.34 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -1.0(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PW6B95-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7.4(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.55(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -CAM-B3LYP-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -9(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.05 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.38(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -CAM-B3LYP-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -9(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.05 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.38(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -CAM-B3LYP-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -4.3(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -10(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.20(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.76(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.5(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -revPBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.3(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.10 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.33 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.43(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.27(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.28(6) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -revPBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.3(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.07 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.38 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -revPBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.8(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.46(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --2.0(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -1.3(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -M06L-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -5.1(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -13(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.10 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -M06L-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -5.1(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -13(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.10 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -M06L-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -5.5(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -14(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.22(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.71(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.5(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.5(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.32 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.45(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.51(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.20(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.16(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.4(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.46 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.60(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -3.9(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.68(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.0(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.5(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -RPBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.4(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -RPBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.4(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -RPBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -8.3(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -20(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.05(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.95(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --5.3(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -2(1) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:caldeweyher" - -\end_inset - -Case CAL2019: same as Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - DFT-D3 has been taken as reference throughout. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -Let us consider several examples with the SIP approach. -\end_layout - -\begin_layout Itemize - -\series bold -PBE0-Dn -\series default -. - There is a small MUE decrease using D4 (from 2.6 to 2.3), no effect on -\begin_inset Formula $Q_{95}$ -\end_inset - -. - Inspection of Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:caldeweyher1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a) shows that the 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence interval (CI) for the SIP value of 0.61 for PBE0-D4-ATM over - PBE0-D3 does not exclude the neutral value (0.5), with a tiny advantage - of the mean gain over the mean loss. - One can note also that, despite their large error bars, the small MUE differenc -e between these two methods is significantly different from 0 (its 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence interval excludes 0), an effect of the correlation between - error sets. - -\end_layout - -\begin_layout Itemize - -\series bold -PW6B95-Dn -\series default -. - This case is an inversion of the previous one, where the confidence interval - on the SIP value of 0.4 (disadvantaging D4) does not exclude the neutral - value, and the CI on the MUE difference does not exclude 0. - One cannot firmly conclude that the D3 version performs better than the - D4 ones for this DFA. -\end_layout - -\begin_layout Itemize - -\series bold -RPBE-Dn -\series default -. - For this case, one has a rare instance where D4 improves almost systematically - over D3, with a SIP of 0.95(3), and a mean gain overwhelming the mean loss. -\end_layout - -\begin_layout Standard -Except for RPBE-Dn, where the SIP value of D4 over D3 is about 0.95, and - DOD-PBE ( -\begin_inset Formula $SIP=0.83$ -\end_inset - -), all the estimated SIP values lie near or below 0.75, down to 0.45, meaning - that there is no systematic improvement when passing from D3 to D4. - In several cases, the uncertainty due to the limited set size does not - allow to conclude clearly. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_PBE0_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_PW6B95_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_RPBE_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:caldeweyher1" - -\end_inset - -Case CAL2019: SIP plots. - The orange band depicts the chemical accuracy (1 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -Considering that both DFT-D4 options are mostly indiscernible, we built - global ranking probability matrices for the DFT-D3 and DFT-D4-ATM data. - The results are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:caldewheyerRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(top). - Although the rankings of the Dn options for each DFA are mostly unambiguous, - a global ranking is clearly very uncertain. - Based on the MUE, DOD-PBE-D4-ATM and PBE0-D4-ATM would share the leading - places. - Beyond that, the situation is utterly scrambled, the only clear point being - the last ranks for M06-L-D3 and RPBE-D3. - The picture based on -\begin_inset Formula $Q_{95}$ -\end_inset - - is even less well defined, with no clear leading method within a group - of five. - The MSIP ranking is akin to the MUE ranking. -\end_layout - -\begin_layout Standard -If one restricts the methods to DFT-D4-ATM (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:caldewheyerRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - bottom), the situation is slightly better defined for the leading and tailing - places for the three scores, but remains very undecidable in intermediate - ranks. - This illustrates how, for a given dataset, the uncertainty in ranking is - also affected by the number of methods to be ranked. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_figRanks_mue_levels_D4.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_figRanks_q95hd_levels_D4.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Caldeweyher2019_figRanks_msip_levels_D4.png - lyxscale 20 - width 32col% - BoundingBox 0bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:caldewheyerRPmat" - -\end_inset - -Case CAL2019: ranking probability matrices for (top) DFT-D3 and DFT-D4-ATM - methods, and (bottom) DFT-D4-ATM methods only. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Subsection -JEN2018 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Jensen2018" - -\end_inset - - -\end_layout - -\begin_layout Standard -This dataset contains non-covalent interaction energies estimated by M06-L - with six different basis sets for 66 systems in the S66 dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Rezac2011,Rezac2011_Erratum" -literal "false" - -\end_inset - -. - This is a part of the results reported in Table 8 of a recent article by - Jensen -\begin_inset CommandInset citation -LatexCommand cite -key "Jensen2018" -literal "false" - -\end_inset - -, and available as Supplementary Information to this article. - This dataset was used by Jensen to study the impact of error cancellations - when using standard or optimized medium-sized basis sets. - Six basis sets are considered (pop2 = 6-31G(d,p), pop3 = 6-311G(2df,2pd), - pcseg-1, pcseg-4, pop2-opt and pcseg1-opt), where the '-opt' ones have - optimized contraction coefficients with respect to the reference data. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The error sets of the '-opt' methods are practically uncorrelated to the - other sets (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensenCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), and in the remaining methods, pcseg4 errors are anti-correlated with - the other ones. - A striking feature of this dataset is that this negative correlation persists - for the MUE, contradicting the trends observed in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - Otherwise, the correlations globally weaken for -\begin_inset Formula $Q_{95}$ -\end_inset - -, except for the pop2/pop3 and pcseg1/pcseg1-opt cases, for which the correlatio -n is stronger as the one between the error sets. - -\begin_inset Float figure -placement tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Jensen2018_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Jensen2018_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Jensen2018_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:jensenCorrmat" - -\end_inset - -Case JEN2018: rank correlation matrices. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The statistics in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:jensen" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - show the strong impact of basis-set optimization, both optimized basis - sets provide comparable results for the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -. - All statistics show that the ranking between both '-opt' methods is not - strict. -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pop2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.9(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -7.2(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.35(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.77(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.9(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pop3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.4(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.4(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.74(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pcseg1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.5(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5.6(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.42(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.9(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pcseg4 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.5(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.8(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.89(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.6(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pop2-opt -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.06(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.05 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.24 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.67(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.62(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.66(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.65(9) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pcseg1-opt -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.90(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.5(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.76(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:jensen" - -\end_inset - -Case JEN2018: same as Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -They both also stand out by their MSIP, with a slight advantage for pcseg1-opt. - One more, the importance of error cancellations stands out through the - medium values of the SIP of pcseg1-opt over the other cases. - The strongest improvement is 0.9 over pcseg4, the smallest 0.6 over pop2-opt. - The plots in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensen1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - illustrate these features. - The SIP matrix shows clearly the medium supremacy of the optimized basis - sets, and a slight advantage of pcseg1-opt over pop2-opt. - The major gain when going to pop2 to pop2-opt is visible in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensen1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c) where the medium SIP (~0.7) is compensated by the very small mean loss - (0.6 -\begin_inset space \thinspace{} -\end_inset - -kJ/mol). - In contrast, Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensen1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d) shows that the improvement of pcseg1-opt over pop2-opt is marginal, - with SIP values close to the neutral value (0.5) and symmetrical MG and - ML values. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Jensen2018_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Jensen2018_SIPHeatmap.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Jensen2018_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Jensen2018_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:jensen1" - -\end_inset - -Case JEN2018: (a) ECDF of the absolute errors; (b) SIP matrix; (c,d) ECDF - of the difference of absolute errors of pop2 and pcseg1-opt with respect - to pop2-opt. - The orange bar represents a chemical accuracy of 1 -\begin_inset space \thinspace{} -\end_inset - -kJ/mol. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -The leading position of the '-opt' methods is solid and confirmed by our - three scores (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensenRanking" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Jensen2018_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Jensen2018_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Jensen2018_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:jensenRanking" - -\end_inset - -Case JEN2018: ranking probability matrices. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -DAS2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Das2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -A small set of 24 dielectric constants for 3D metal oxides has been reported - by Das -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Das2019" -literal "false" - -\end_inset - - in their Table 3. - One of the experimental values being unknown, this limits the dataset to - 23 values. - Experimental uncertainties are not specified. - The predictions by six DFAs are reported, three global hybrids (PBE0, B3LYP - and DD-B3LYP) and three range-separated hybrids (SC-BLYP, DD-SCBLYP and - DD-CAM-B3LYP). - This is a small dataset, below the standards required for low type I errors - (false positive) in the comparison of MUE ( -\begin_inset Formula $N>30$ -\end_inset - -) and -\begin_inset Formula $Q_{95}$ -\end_inset - - ( -\begin_inset Formula $N>60$ -\end_inset - -) (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Type-I-error" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The correlation matrices of the errors, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - have uniformly strongly positive elements (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --top). - This is an unusual situation when compared to the previous cases. - Knowing that correlation coefficients are sensitive to outliers (even if - rank correlations are a little more robust), we explored the dataset for - outliers. - A parallel plot (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) of the scaled and centered error sets enables to identify systems which - deviate significantly from the core distribution. - Two such systems exist for all methods: -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -ce{BiVO4} -\end_layout - -\end_inset - - and -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -ce{Cu2O} -\end_layout - -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Das2019_ParPlot.png - lyxscale 20 - width 45text% - BoundingBox 50bp 0bp 1100bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:dasCorrmat-1" - -\end_inset - -Case DAS2019: parallel plot of scaled and centered error sets to identify - outliers. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - After removal of these two points, the correlation matrix for the errors - is slightly relaxed (the smallest correlation coefficient decreases from - 0.81 to 0.74), but those for MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - are visibly more affected ((Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --bottom)). - In fact, the parallel plot reflects the strong correlations between all - errors sets (many quasi-parallel lines), except for DD-CAM-B3LYP. - The pruned dataset ( -\begin_inset Formula $N=21$ -\end_inset - -) is used in the following. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Das2019_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Das2019_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Das2019_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Das2019_CorrMat_Errors_Spearman_Pruned.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Das2019_CorrMat_MUE_Spearman_Pruned.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Das2019_CorrMat_Q95_Spearman_Pruned.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:dasCorrmat" - -\end_inset - -Case DAS2019: rank correlation matrices. - (top) original data set ( -\begin_inset Formula $N=23$ -\end_inset - -); (bottom) after removal of two outliers ( -\begin_inset Formula $N=21$ -\end_inset - -). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -Considering the small size of the sample, few clear-cut conclusions are - possible. - Only DD-CAM-B3LYP stands out significantly, either by its MUE, Q95 and - MSIP values (Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:das" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - At the opposite, although its MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - values are not distinguishable from those of PBE0, B3LYP, SC-BLYP and DD-SC-BLY -P, DD-B3LYP is the worst performer of the group based on the SIP statistics. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\color teal -a.u. -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\color teal -a.u. -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\color teal -a.u. -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\color teal -a.u. -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.44(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.19(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.4(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.38(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.21(6) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DD-B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.70(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.30(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.19(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.90(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.41(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.4(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -SC-BLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.58(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.3(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.62(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.36(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.22(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DD-SC-BLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.68(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.23(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.29(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.90(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.39(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.4(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DD-CAM-B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.36(6) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.83(7) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.82(8) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:das" - -\end_inset - -Same as Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for case DAS2019. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The latter two methods are clearly identifiable in the SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:das" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), with a full reddish line for DD-CAMB3LYP, and a full blueish line - for DD-B3LYP. - The impact of the small set size on this conclusion is illustrated in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:das" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b,c), where the ECDFs of the differences of absolute errors are plotted - for DD-CAM-B3LYP -\emph on -vs -\emph default -. - B3LYP and DD-B3LYP -\emph on -vs -\emph default -. - B3LYP. - Despite being very large, the error bars on the statistics enable to validate - these conclusions. - Any ranking of the remaining four DFAs would be unreliable. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Das2019_SIPHeatmap_Pruned.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Das2019_deltaECDF_Pruned.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Das2019_deltaECDF2_Pruned.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:das" - -\end_inset - -Case DAS2019: (a) SIP matrix; (b) ECDF of the difference of absolute errors - of methods DD-CAMB3LYP and B3LYP; (c) idem for DD-B3LYP and B3LYP. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -All ranking matrices confirm a solid leading place for DD-CAM-B3LYP (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasRanking-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - The MUE and MSIP rankings would then favor SC-BLYP and B3LYP, in disagreement - with the -\begin_inset Formula $Q_{95}$ -\end_inset - - ranking, for which the three DD-X methods have leading ranks. - An example of a -\begin_inset Formula $N'$ -\end_inset - --out of- -\begin_inset Formula $N$ -\end_inset - - bootstrap ( -\begin_inset Formula $N'=N/3$ -\end_inset - -) is shown on the bottom row. - The uncertainties are slightly larger, notably for the -\begin_inset Formula $Q_{95}$ -\end_inset - - ranks above the first. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Das2019_figRanks_mue_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Das2019_figRanks_q95hd_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Das2019_figRanks_msip_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Das2019_figRanks_mue_levels_Pruned_M.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Das2019_figRanks_q95hd_levels_Pruned_M.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Das2019_figRanks_msip_levels_Pruned_M.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:dasRanking-1" - -\end_inset - -Case DAS2019 - ranking probability matrices: (top) -\begin_inset Formula $N$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - bootstrap; (bottom) -\begin_inset Formula $N/3$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - bootstrap. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Subsection -THA2015 / WU2015 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Thakkar2015" - -\end_inset - - -\end_layout - -\begin_layout Standard - -\color teal -Thakkar -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015" -literal "false" - -\end_inset - - compiled a database of polarizabilities for 135 molecules, from triatomics - to 26-atoms systems. - The experimental data are given with their uncertainty, and computational - results are provided for 7 methods. - Data THA2015 for our study were extracted from Tables II-IV in the reference - article. - The raw errors present a dispersion increasing with the polarizability, - hence relative errors are used in the reference article and this study. - -\end_layout - -\begin_layout Standard - -\color teal -The relative uncertainties for the reference experimental data cover a large - range, from 0.09 -\begin_inset space \thinspace{} -\end_inset - -% to 12.4 -\begin_inset space \thinspace{} -\end_inset - -%, the median value is 1.7 -\begin_inset space \thinspace{} -\end_inset - -%. - The authors identified 8 outliers, and a total 32 systems in need or further - study. - The outliers do not contain the points with the extreme uncertainties, - so that, even after removal of the 32 problematic systems, the range of - relative uncertainties stays the same. - The dispersion of uncertainties would certainly justify the use of weighted - statistics. - This was not the choice of Thakkar -\emph on -et al. -\emph default -, and we proceed then with unweighted statistics, keeping in mind that the - results might be dominated by reference data errors instead of model errors. -\end_layout - -\begin_layout Standard - -\color teal -In a complementary study, Wu -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Wu2015b" -literal "false" - -\end_inset - - calculated polarizabilities for a set of 145 molecules with HF, MP2, CCSD(T) - and 34 DFAs. - In this study, CCSD(T) was used as reference to evaluate the other methods. - In the following, we select the subset of 7 methods common to both datasets - (WU2015). - This enables us to study the impact of the reference data (experimental - -\emph on -vs. - -\emph default - calculated) on the correlation and ranking matrices. - -\color inherit - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Weights. -\end_layout - -\begin_layout Plain Layout -First, let us consider the uncertainties on the relative errors. - For all methods, the Birge ratios (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:Birge" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) exceed largely the upper limit of the admitted 95% confidence interval. - The contribution of model errors is therefore important, and one adopts - the scheme exposed above where the data are weighted as in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:weights-IRWLS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\end_layout - -\begin_layout Plain Layout -As shown earlier, weights play a role in the weighting scheme of statistics - only if their dispersion is large. - The risk is that the dataset becomes dominated by a few accurate values. - The logarithms of the weights for the seven methods are plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar3" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - One can see that (1) all methods have similar weight sets; and (2) there - are no points with outstandingly large weights, but their distribution - has a long tail towards small values. - Ten systems have been tagged in the figure, which contribute negligibly - to the weighted statistics. - -\begin_inset Float figure -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename /home/pernot/Bureau/Andreas/1-Paper2019Comp/results/figs/Thakkar2015_weightsParPlot.png - lyxscale 40 - height 8cm - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar3" - -\end_inset - -Corrmat -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The Pearson correlation matrix of the error sets (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)) is uniformly strongly positive. - The smallest CC value is 0.8. - To appreciate the role of data points with large deviations (outliers) - in these strong correlations, we removed a set of 8 outliers identified - by Thakkar -\emph on -et al. -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015" -literal "false" - -\end_inset - - ((Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)). - Most of the correlations weaken notably. - For comparison, the rank correlation matrix was calculated for the full - dataset ((Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)). - This matrix is very similar to the one with outliers removed, illustrating - the better resilience of rank correlations to outliers. - Finally, the errors, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none -rank correlation -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit - matrices were estimated on the pruned ( -\begin_inset Formula $N=127$ -\end_inset - -) dataset (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d-f)). - The structure of the errors correlation matrix is transferred to the statistics -, with attenuated correlation intensities. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_CorrMat_Errors.png - lyxscale 20 - width 32col% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_CorrMat_Errors_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32col% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_CorrMat_Errors_Spearman_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_CorrMat_MUE_Spearman_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_CorrMat_Q95_Spearman_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar2" - -\end_inset - -Case THA2015 - correlation matrix: (a) Pearson correlation of the full data - set ( -\begin_inset Formula $N=135$ -\end_inset - -); (b) Pearson correlation of the pruned dataset ( -\begin_inset Formula $N=127$ -\end_inset - -); (c) Spearman/rank correlation of the full data set; (d): Errors rank - correlation; (e): MUE rank correlations; (f) -\begin_inset Formula $Q_{95}$ -\end_inset - - rank correlations. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The error, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - rank correlation matrices were also calculated for the full WU2015 dataset - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - In the absence of reference data uncertainties, MP2 errors are now weakly - anticorrelated to the other error sets, while all DFAs remain positively - correlated. -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Wu2015_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Wu2015_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Wu2015_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar2-1" - -\end_inset - -Case WU2015 - rank correlation matrix: (a) errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. - -\end_layout - -\begin_layout Standard -The values of MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - for the full THA2015 dataset are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:thakkar" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The MUE values agree with those of the reference article, but the uncertainty - bears on the second digit, showing that a third digit is essentially irrelevant. - The analysis of -\begin_inset Formula $P_{inv}$ -\end_inset - - for the MUE leads us to conclude that there is a group of four methods - (M11, M06-2X, LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH and MP2) with similar performances, which is confirmed by the comparison - of their empirical cumulated distribution functions -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - These ECDFs overlap over the whole range. - Besides, these methods cannot be discriminated on the basis of their -\begin_inset Formula $Q_{95}$ -\end_inset - - values, as it appears that all values are indiscernible. - These conclusions are unchanged when one removes the 8 outliers identified - by Thakkar -\emph on -et al. - -\emph default -(not shown). - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -M11 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.1(3) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.36 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.58(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.4(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.16(10) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -M06-2X -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.2(3) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.48 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.53(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.2(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.0(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $\omega$ -\end_inset - -B97 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -11(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.23 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.53(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.94(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.72(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.0(3) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.32 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.59(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HISS -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.8(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.35 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.34(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.72(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.62(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.5(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.9(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -11(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.26 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.31(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.78(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.39(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.2(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -MP2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.2(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.21 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -11(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.35 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.56(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.45(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(1) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:thakkar" - -\end_inset - -Same as Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for the case THA2015. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Thakkar2015_compareECDF_Pruned.png - lyxscale 25 - width 32col% - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Thakkar2015_compareECDF2_Pruned.png - lyxscale 25 - width 32col% - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar1" - -\end_inset - -Case THA2015: ECDFs of absolute relative errors: (a) methods with indiscernible - MUE; (b) other methods. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)) for the THA2015 dataset reveals a leading group of four methods identical - to those identified above. - When passing to WU2015 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)), there is a better discrimination between methods, and MP2 presents - SIP values over all the other methods. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Thakkar2015_SIPHeatmap_Pruned.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Wu2015_SIPHeatmap.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkarSIP" - -\end_inset - -SIP matrix: (a) case THA2015 ( -\begin_inset Formula $N=127$ -\end_inset - -); (b) case WU2015. - The methods are sorted by decreasing MSIP value. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -The ranking matrices are plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The top row concerns dataset THA2015. - The ranking probability matrices for the MUE confirm the problem seen above - for the three best methods. - It shows also that the rank of MP2 is quite ill-defined. - For -\begin_inset Formula $Q_{95,}$ -\end_inset - -, as expected, any ranking seems illusory. - The same matrices have been estimated after the removal of 8 outliers defined - above (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --middle row). - This has a negligible impact on the MUE ranking, but fully scrambles the - -\begin_inset Formula $Q_{95}$ -\end_inset - - one, M11 passing from the first to the last place, MP2 from the 8th to - the first, and so on. - In fact, ill-defined ranking matrices can be expected to be very sensitive - to any alteration of the dataset. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_figRanks_mue_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_figRanks_q95hd_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Thakkar2015_figRanks_msip_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Wu2015_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Wu2015_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/Wu2015_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 100bp 150bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkarRPmat" - -\end_inset - -Ranking probability matrices: (top) case THA2015 full dataset ( -\begin_inset Formula $N=135$ -\end_inset - -); (middle) case THA2015 dataset pruned from 8 outliers ( -\begin_inset Formula $N=127$ -\end_inset - -); (bottom) case WU2015 ( -\begin_inset Formula $N=145$ -\end_inset - -). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -When considering the WU2015 dataset, the ranking matrices (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --bottom row) show much less dispersion, underlining the deleterious role - of experimental errors on ranking. - Note that there remains a notable uncertainty to rank -\begin_inset Formula $\omega$ -\end_inset - -B97, M11, M06-2X and LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH using -\begin_inset Formula $Q_{95}$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -Depending on the reference dataset (experimental or CCSD(T)) one obtains - different rankings: LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH seems a better option to predict experimental values, when MP2 is a - better proxy for CCSD(T) calculations. - -\end_layout - -\begin_layout Subsection -ZAS2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Zas2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -The effective atomization energies ( -\begin_inset Formula $E^{*}$ -\end_inset - -) for the QM7b dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Montavon2013" -literal "false" - -\end_inset - -, for 7211 molecules up to 7 heavy atoms (C, N, O, S or Cl) are available - for several basis sets (STO-3g, 6-31g, and cc-pvdz), three quantum chemistry - methods (HF, MP2 and CCSD(T)) and four machine learning algorithms (CM-L1, - CM-L2, SLATM-L1 and SLATM-L2). - The data have been provided by Zaspel -\emph on -et al -\emph default -. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Zaspel2019" -literal "false" - -\end_inset - -. - The machine learning methods have been trained over a random sample of - 1000 CCSD(T) energies (learning set), and the test set contains the prediction - errors for the 6211 remaining systems -\begin_inset CommandInset citation -LatexCommand cite -key "Zaspel2019" -literal "false" - -\end_inset - -. - We retain here only HF, MP2 and SLATM-L2 and compare their ability to predict - CCSD(T) values. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The error sets are essentially uncorrelated (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zasCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), whereas small positive correlations can be noted for the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -. - In this problem, it would therefore be possible to ignore correlations - when computing -\begin_inset Formula $P_{inv}$ -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Zaspel2019_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Zaspel2019_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Zaspel2019_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 0bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:zasCorrmat" - -\end_inset - -Case ZAS2019 - rank correlation matrices: : (a) errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The values are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - There is a contrast between the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -. - SLATM-L2 and MP2 have close MUE values, with an above-threshold -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}\simeq2P_{inv}=0.06$ -\end_inset - -), and a slight advantage for SLATM-L2. - However, MP2 has a significantly smaller -\begin_inset Formula $Q_{95}$ -\end_inset - -. - As seen on the absolute errors ECDFs (Fig. -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), SLATM-L2 has indeed a pronounced tail of large errors. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HF -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.38(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.1(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.283(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.743(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.03(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.50(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -MP2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.31(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.03 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.35(5) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.538(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.613(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.08(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.58(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -SLATM-L2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -1.26(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.678(5) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:zas" - -\end_inset - -Same as Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for case ZAS2019. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -This case emphasizes the fact that similar values of the MUE can result - by chance from very distinct error distributions, and that no conclusion - should be taken on the basis of MUE alone. - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)) shows that SLATM-L2 presents a notable improvement probability ( -\begin_inset Formula $\sim$ -\end_inset - -0.75) over HF and a moderate one aver MP2 ( -\begin_inset Formula $\sim0.61$ -\end_inset - -). - Even if SLATM-L2 has significantly better statistics than HF (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)), there remains a 25 -\begin_inset space \thinspace{} -\end_inset - -% chance that the latter provides smaller absolute errors. - In most case studies presented above, the mean gain was larger in absolute - value than the mean loss. - In the comparison between SLATM-L2 and MP2, one observes the opposite: - by choosing SLATM-L2 over MP2 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d)), one has 61 -\begin_inset space \thinspace{} -\end_inset - -% chance to get better results, with a mean gain -\begin_inset Formula $\mathrm{MG}\simeq-1.1$ -\end_inset - - -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none - -\begin_inset space \thinspace{} -\end_inset - -kcal/mol -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit -, and 39 -\begin_inset space \thinspace{} -\end_inset - -% chance to deteriorate the MP2 values with a mean loss -\begin_inset Formula $\mathrm{ML}\simeq1.6$ -\end_inset - - -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none - -\begin_inset space \thinspace{} -\end_inset - -kcal/mol -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Zaspel2019_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Zaspel2019_SIPHeatmap.png - lyxscale 25 - width 32text% - BoundingBox 200bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/Zaspel2019_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/Zaspel2019_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:zas" - -\end_inset - -Case ZAS2019: : (a) ECDF of the absolute errors; (b) SIP matrix; (c,d) ECDF - of the difference of absolute errors of HF (c) and MP2 (d) with respect - to SLATM-L2. - The orange band represents the chemical accuracy (1 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -Discussion -\end_layout - -\begin_layout Subsection -Extracting data from articles and supplementary material -\end_layout - -\begin_layout Standard -The raw data of benchmark studies are important assets for the community, - and their accessibility and reusability are very important for intercomparison - studies or the development of alternative statistical analyses, as performed - in this study. - We found many benchmarking studies with practically inaccessible data, - -\emph on -i.e. -\emph default -, failing the 'A' (Accessible) and/or the 'R' (Reusable) of the FAIR principle - of Open Data -\begin_inset CommandInset citation -LatexCommand cite -key "Wilkinson2016" -literal "false" - -\end_inset - -. - Besides the trivial case of non-available data, we have stumbled on data - stored in complex databases and requiring non-trivial coding for their - extraction, or data stored in inappropriate formats, such as PDF (a Page - Description Format), instead of recognized machine-readable data storage - formats, such as CSV tables. - -\end_layout - -\begin_layout Standard -Note that for some of the cases we gathered here, we were able to extract - data from PDF articles or SI files, but not without some difficulty, involving - several steps of manual operations. - Typical problems for the data extraction from tables in PDF documents are: - excessive numerical truncation, empty cells or complex table mapping, typograph -ical ( -\begin_inset Formula $-$ -\end_inset - -) instead of numerical (-) minus sign, rotated tables, compact notations - for uncertainty (either 123(4) or 123 -\begin_inset Formula $\pm$ -\end_inset - -4), bibliographical references attached to the data (generally processed - as spurious decimals)... - Most of these features preclude automated data extraction and require error-pro -ne human intervention. -\end_layout - -\begin_layout Standard -So, unless the structure of the data is complex, and this should not be - the case for most benchmark studies, it is warmly recommended to use 'flat' - numerical tables stored in an open format, such as CSV, and to avoid to - put more than one information in each cell. - -\begin_inset Quotes eld -\end_inset - -Think Open, think FAIR ! -\begin_inset Quotes erd -\end_inset - - -\end_layout - -\begin_layout Subsection -Impact of dataset size -\end_layout - -\begin_layout Standard -The examples above have shown that data set size impact considerably the - ability to rank methods or to assert the impact of an improved method. - Data set size effect on the uncertainty of statistics is well known for - the mean value, and similar formulae can be derived for other statistics - under normality hypotheses. - However, the non-normality of error sets requires the use of numerical - methods, typically bootstrap sampling. - This enables to show how the usual benchmark statistics are affected by - sample size. - We have seen, for instance, -\color teal -that there is a notable probability to conclude erroneously that two -\begin_inset Formula $Q_{95}$ -\end_inset - - values are different when they are not (type I errors or false positive) -\color inherit - if -\begin_inset Formula $N<60$ -\end_inset - - (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Type-I-error" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - Moreover, for small datasets (a few tens of points), the first digit of - the statistics is often affected by the uncertainty. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -In such cases, ranking with the second or third digit of a statistics has - no sense, unless correlations are taken into account. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -It is practically impossible to predict the dataset size required for a - stable and robust ranking. - Many factors other than set size are involved, notably the number and nature - of methods to be ranked. - When a lot of DFAs are compared, a hierarchical ranking is often performed, - for instance by first choosing the best method at each rung of the Jacob's - ladder, and then comparing these methods together -\begin_inset CommandInset citation -LatexCommand cite -key "Wu2015b" -literal "false" - -\end_inset - -. - This is one way to avoid the ranking uncertainty resulting from the direct - comparison of many methods. -\end_layout - -\begin_layout Standard -We can only encourage benchmark authors to provide adequate uncertainty - estimations and ranking probability matrices, which can be obtained with - a negligible overcharge in computer time. - -\end_layout - -\begin_layout Subsection -The correlation matrix as a sanity check -\end_layout - -\begin_layout Standard -When we started this study, the correlation matrices were mainly intended - to illustrate the importance to consider them when comparing statistics. - When cumulating the case studies, it appeared that they contain a lot of - information relevant to the quality of the benchmark dataset. - To our knowledge, this subject has not previously been discussed, and benchmark -ing studies do not report such correlation matrices. -\end_layout - -\begin_layout Standard -Considering that model errors in computational chemistry are mostly systematic, - one expects that error patterns over a dataset are characteristic of each - method or family of methods. - This seems to be a basic requirement for sound benchmarking studies. - Considering the errors correlation matrix, the guiding line is thus that - closely related methods should produce similar error patterns and have - strongly correlated error sets, the correlation level decreasing with a - -\begin_inset Quotes eld -\end_inset - -distance -\begin_inset Quotes erd -\end_inset - - between methods. - This is clearly illustrated in the case BOR2019, where the correlation - matrix clusters nicely into relevant DFA groups. - There seems also to be a clean decorrelation between MP2 or MP2-based methods - and DFAs (NAR2019, WU2015). - Similarly, one observes no correlation between HF, MP2 and a machine-learning - method calibrated on CCSD(T) in case ZAS2019. -\end_layout - -\begin_layout Standard - -\emph on -A contrario -\emph default -, when the methods set contains unrelated methods, a uniform strongly positive - correlation matrix should raise an alert. - We have seen in cases DAS2019 and THA2019 that outliers and/or large reference - data errors could dominate the correlation matrix and influence the benchmark - statistics. - If the ranking study is to reflect the methods performances, the curation - and possible pruning of the dataset is a necessary preliminary step. - Otherwise, more complex statistical models have to be used to alleviate - the impact of those points (see Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Estimation-of-the" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - and several references -\begin_inset CommandInset citation -LatexCommand cite -key "Lejaeghere2014,Pernot2015,Proppe2017" -literal "false" - -\end_inset - -). -\end_layout - -\begin_layout Standard -Note that strongly correlated error sets do not imply similar performances. - For instance a set of linearly scaled harmonic vibrational frequencies - typically has better statistics than the unscaled set, whereas their correlatio -n coefficient is 1 because of the linear transformation. - -\end_layout - -\begin_layout Subsection -Impact of error sets correlation on ranking -\end_layout - -\begin_layout Standard -The correlation between error sets is partially or totally transferred to - statistics. - Except for linear transformations of the errors, where the transfer is - total, including the sign, one uses Monte Carlo methods to estimate this - transfer. - In many cases, such as for normal, student's or g-and-h error distributions -\begin_inset CommandInset citation -LatexCommand cite -key "Hoaglin1985" -literal "false" - -\end_inset - -, one observes that the correlation intensity mainly decreases when passing - from errors to MUE to -\begin_inset Formula $Q_{95}$ -\end_inset - -. - The case studies above show however that there are exceptions to this basic - trend. - We cannot presently rationalize the observed exceptions, but the main conclusio -n is that in most cases, one should not ignore correlations when comparing - statistics. - -\end_layout - -\begin_layout Standard -However, unlike as shown above for the error correlations, the visualization - of correlations between statistics might be of secondary interest. - In fact, the paired samples bootstrap algorithms used in this study enable - to account directly for these correlations without having to estimate them. - -\end_layout - -\begin_layout Standard -In a vast majority of the cases studied above, the correlation matrices - for MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - have positive coefficients. - These contribute to a reduction of the uncertainty on statistics differences, - with better discernibility between uncertain statistics. - Globally, positive correlations increase the robustness of rankings. - -\end_layout - -\begin_layout Subsection -Systematic improvement analysis -\end_layout - -\begin_layout Standard -We introduced a new criterion, the systematic improvement probability, which - has the major advantage to be independent of the usual descriptive statistics. - It simply counts the signs of the differences of absolute error pairs. - It is a useful complement to the MUE, as it enables to analyze a MUE difference. - All the case studies show that a decrease of MUE results from the balance - between gains and losses. - At the exception of one methods pair in CAL2019, we did not find a 'best - method' which improves the results of lower rank methods for the full benchmark - dataset. - A point we wish to make here is that -\emph on -even physics-based improvements in DFAs do not lead to systematic improvements - for all systems -\emph default -. - We have seen for instance that for band gaps, mBJ degrades LDA predictions - for 16 -\begin_inset space \thinspace{} -\end_inset - -% of the systems (BOR2019). - In fact, there is often a non-negligible percentage of systems for which - the 'bad' method is better than the 'good' one, all across Jacob's ladder. - As long as the performances of computational chemistry methods rely on - error cancellations, physics-based improvements of DFAs can be seen as - a kind of statistical correction. -\end_layout - -\begin_layout Subsection -MSIP ranking vs MUE -\end_layout - -\begin_layout Standard - -\color teal -The mean systematic improvement probability (MSIP) was tested here as a - ranking statistic. - Its main advantage is its independence from the usual summary statistics; - its main drawback is that it depends on the set of methods being compared - and it is not transferable to comparisons out of its definition set. - In the various cases treated above, the rankings provided by the MSIP are - most often conform to the MUE rankings and are as sensitive as the other - rankings to sampling uncertainty. - A ranking conflict withe the MUE was observed when some error sets presented - heavy tails, such as in case PER2018. - However, such cases can readily be identified by other means ( -\emph on -e.g. -\emph default -, ECDFs of absolute errors), reducing the interest of the MSIP as a ranking - score. -\end_layout - -\begin_layout Section -Conclusion -\end_layout - -\begin_layout Standard -In this article, we proposed several tools to test the robustness of rankings - or comparisons of methods based on error statistics for non-exhaustive, - limited size datasets. - In order to avoid a normality hypothesis on the errors distributions, bootstrap --based methods were adopted, as suggested by Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - - for the estimation of prediction uncertainty of DFT methods. - Our target statistics were the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -, but these tools are straightforwardly applicable to other statistics. -\end_layout - -\begin_layout Standard -Before any ranking, we have seen that the error sets correlation matrix - can be useful to appreciate the quality of a benchmark dataset. - Then, the ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - for a chosen statistic provides a clear diagnostic on the robustness of - the corresponding ranking. - The impact of dataset size and number of compared methods can be thoroughly - tested. -\end_layout - -\begin_layout Standard -When considering pairs of methods, we generalized our previous definition - of the inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - to account for correlations between statistics. - We also introduced the systematic improvement probability (SIP) which is - independent of other descriptive statistics. - We have seen that the use of MUE for ranking hides a complex interplay - between the genuine method improvement and the error cancellations inherent - to most computational chemistry methods. - In particular, we have shown how a difference in MUE is a balance between - gains and losses in absolute errors. - Estimation of the systematic improvement probability (SIP) and the mean - gain (MG) and loss (ML) statistics can help understand this balance, and - to assess the risks of switching between two methods. - None of the showcased examples revealed a method which provides a full - systematic improvement over its concurrents, even when comparing an elaborate - composite method such as G4MP2 to DFAs. - The pedagogical virtue of the SIP is to clearly show that computational - chemistry is a science of compromises. - -\end_layout - -\begin_layout Standard -We considered here only homogeneous datasets. - Many modern benchmarks are based on composite datasets, involving weighting - schemes to incorporate data with different units -\begin_inset CommandInset citation -LatexCommand cite -key "Goerigk2017" -literal "false" - -\end_inset - -. - The applicability of the SIP to such datasets is direct, but the mean gain - and mean loss statistics should become multivariate. - For the estimation of -\begin_inset Formula $P_{inv}$ -\end_inset - - and ranking probability matrices for composite statistics ( -\emph on -e.g. -\emph default -, WTMAD -\begin_inset CommandInset citation -LatexCommand cite -key "Goerigk2017" -literal "false" - -\end_inset - -), adaptation of the paired-sample bootstrap is straightforward, although - care should be taken to avoid imbalance between the various components - of the dataset by using stratified bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Hesterberg2015" -literal "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -Finally, we considered here for simplicity raw error sets, from which no - care has been taken to remove systematic trends. - When this is possible, such trend corrections, often simply linear, will - provide much better generalizability of the summary statistics derived - from these error sets. - Besides, this is a necessary step if one wishes to estimate the prediction - uncertainty of any method -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -, notably when dealing with non-uniform reference data uncertainties. -\end_layout - -\begin_layout Section* - -\color teal -Acknowledgment -\end_layout - -\begin_layout Standard - -\color teal -The authors are grateful to Pr. - O. -\begin_inset space \thinspace{} -\end_inset - -A. -\begin_inset space \thinspace{} -\end_inset - -von -\begin_inset space ~ -\end_inset - -Lilienfeld for providing the datasets of case ZAS2019, and to Pr. - S. -\begin_inset space \thinspace{} -\end_inset - -Grimme for providing a corrected copy of Supplementary Information for case - CAL2019. -\end_layout - -\begin_layout Section* - -\color teal -Supplementary Information -\end_layout - -\begin_layout Standard - -\color teal -Datasets and R code to reproduce the results of the article. -\end_layout - -\begin_layout Standard -\begin_inset CommandInset bibtex -LatexCommand bibtex -btprint "btPrintCited" -bibfiles "packages,NN" -options "bibtotoc,unsrturl" - -\end_inset - - -\end_layout - -\begin_layout Standard -\start_of_appendix -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -appendixpage -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -Estimation of the mean value and its uncertainty -\begin_inset CommandInset label -LatexCommand label -name "sec:Estimation-of-the" - -\end_inset - - -\end_layout - -\begin_layout Standard -Let us consider the mean (signed) value of the errors (MSE). - In absence of uncertainty, it is defined as -\begin_inset Formula -\begin{equation} -\overline{e}=\frac{1}{N}\sum_{i=1}^{N}e_{i} -\end{equation} - -\end_inset - -and its uncertainty (standard error) is estimated as -\begin_inset Formula -\begin{equation} -u(\overline{e})=\sqrt{\frac{s_{e}^{2}}{N}}\label{eq:uref} -\end{equation} - -\end_inset - -where -\begin_inset Formula $s_{e}^{2}$ -\end_inset - - is a sample-based estimator of the population variance -\begin_inset Formula -\begin{equation} -s_{e}^{2}=\frac{1}{N-1}\sum_{i=1}^{N}(e_{i}-\overline{e})^{2} -\end{equation} - -\end_inset - -Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - gives the well-known dependence of the MSE uncertainty with the dataset - size for independent and identically distributed ( -\emph on -i.i.d.) -\emph default -errors, assuming a finite variance, which might exclude error sets with - heavy-tailed distributions, -\emph on -e.g. -\emph default -, Cauchy. - -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Note that -\begin_inset Formula $u(\overline{e})$ -\end_inset - - in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - does not account for the uncertainty on -\begin_inset Formula $s_{e}$ -\end_inset - -. - Taking this factor into account leads to a larger uncertainty, which can - be estimated as -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2003" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -u(\overline{e})=\sqrt{\frac{N-1}{N-3}\frac{s_{e}^{2}}{N}} -\end{equation} - -\end_inset - -This formula is based on the properties of the Student- -\emph on -t -\emph default - distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Evans2000" -literal "false" - -\end_inset - -. - The impact of the correction factor is notable only for very small datasets - (smaller than 3 -\begin_inset space \thinspace{} -\end_inset - -% for -\begin_inset Formula $N\ge30$ -\end_inset - -), and we will consider the standard formula . -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -If uncertainty on errors -\begin_inset Formula $u(e_{i})$ -\end_inset - - is negligible, -\begin_inset Formula $s_{e}$ -\end_inset - - is an estimation of the standard deviation of the errors distribution -\begin_inset Formula $\sigma$ -\end_inset - -, which represents the dispersion of model errors. - If the reference data are uncertain, -\begin_inset Formula $s_{e}$ -\end_inset - - quantifies a dispersion due to both model errors and reference data uncertainty. - In consequence, it overestimates the dispersion of model errors, and specific - models have to be designed if one wishes to estimate this specific contribution - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -. - This points to the necessity of using accurate reference data if the benchmark - based on standard statistics is to reflect the properties of the studied - methods. -\end_layout - -\begin_layout Standard -To be more specific, in the presence of uncertainty on errors, the weighted - mean is the maximum likelihood estimator of the distribution mean under - normality assumptions -\begin_inset CommandInset citation -LatexCommand cite -key "Bevington1992" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{align} -\overline{e} & =\sum_{i=1}^{N}w_{i}e_{i}\\ -w_{i} & =\frac{u(e_{i})^{-2}}{\sum_{j=1}^{N}u(e_{j})^{-2}}\label{eq:wRefUnc} -\end{align} - -\end_inset - -giving less weight to the more uncertain data. - Direct application of the combination of variances to this expression leads - to -\begin_inset CommandInset citation -LatexCommand cite -key "Bevington1992" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -u(\overline{e})^{2}=\frac{1}{\sum_{j=1}^{N}u(e_{j})^{-2}} -\end{equation} - -\end_inset - -Note that in the case of identical uncertainty for all data, one recovers - the expression for the unweighted case (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\end_layout - -\begin_layout Standard -The validity of this estimation has to be tested by computing the weighted - chi-squared -\begin_inset Formula -\begin{equation} -\chi_{w}^{2}=\sum_{i}\frac{(e_{i}-\overline{e})^{2}}{u(e_{i})^{2}}\label{eq:Birge} -\end{equation} - -\end_inset - -If the errors on the reference data are assumed to be normally distributed, - -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - has a chi-squared distribution with -\begin_inset Formula $N-1$ -\end_inset - - degrees of freedom ( -\begin_inset Formula $\chi_{N-1}^{2}$ -\end_inset - -). - -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - should be close to the mean of this distribution, -\begin_inset Formula $N-1$ -\end_inset - -, and lie within its 95 -\begin_inset space \thinspace{} -\end_inset - -% high probability interval. - If -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - is too small, the -\begin_inset Formula $u(e_{i})$ -\end_inset - - are over-estimated and should be reconsidered, or the benchmarked method - is over-fitting the data, which is unlikely, unless the method is parametric - and has been calibrated on this same dataset. - If -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - is too large, there is an excess of variance in the -\begin_inset Formula $E_{M}$ -\end_inset - - error set -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2004,Rukhin2009,Rivier2014" -literal "false" - -\end_inset - -. - In the typical benchmarking of computational chemistry methods, this is - generally the case because of the extraneous dispersion due to model errors. - To ensure the statistical validity of the weighted mean and its uncertainty, - one has therefore to define a more complex error model, considering explicitly - the two sources of dispersion, and to redefine the weights, accounting - for the excess of variance and possible biases in the error sets -\begin_inset CommandInset citation -LatexCommand cite -key "Lejaeghere2014,Lejaeghere2014a,Pernot2015,DeWaele2016,Proppe2017" -literal "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -Considering -\begin_inset Formula $\sigma$ -\end_inset - - as the dispersion of model errors, one can stipulate that the dispersion - of the errors is the combined effect of model error and reference data - uncertainty and redefine the weights as -\begin_inset CommandInset citation -LatexCommand cite -key "Rukhin2009" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -w_{i}=\frac{\left(\sigma^{2}+u(e_{i})^{2}\right)^{-1}}{\sum_{j=1}^{N}\left(\sigma^{2}+u(e_{j})^{2}\right)^{-1}}\label{eq:weights-IRWLS} -\end{equation} - -\end_inset - -with which -\begin_inset Formula -\begin{equation} -u(\overline{e})^{2}=\frac{1}{\sum_{j=1}^{N}\left(\sigma^{2}+u(e_{j})^{2}\right)^{-1}}\label{eq:uwmean} -\end{equation} - -\end_inset - -converges properly to the standard limit when the reference data errors - become negligible before the model errors. - The model error variance -\begin_inset Formula $\sigma^{2}$ -\end_inset - - can be estimated by decomposing the total variance of the errors into the - variance of model errors plus the mean variance of the data (known as Cochran's - ANOVA estimate -\color orange - -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2004,Rivier2014" -literal "false" - -\end_inset - - -\color inherit -) -\begin_inset Formula -\begin{equation} -\mathrm{var}(e)=\sigma^{2}+\frac{1}{N}\sum_{j=1}^{N}u(e_{j})^{2}\label{eq:dispmod} -\end{equation} - -\end_inset - -This variance analysis ensures that -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - is correct. - Note that other reweighting schemes exist -\color orange - -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2004,Rivier2014" -literal "false" - -\end_inset - - -\color inherit -, but Cochran's is the simplest. - Besides, all reweighting methods are iterative: -\begin_inset Formula $\sigma$ -\end_inset - - depends on -\begin_inset Formula $\overline{e}$ -\end_inset - -, which itself depends on -\begin_inset Formula $\sigma$ -\end_inset - -. -\end_layout - -\begin_layout Standard -If the dispersion of reference data uncertainties is small, -\emph on -i.e. -\emph default -, smaller than the model errors contribution, one can reasonably consider - that the weights are identical and that the unweighted mean can be used. - Formally, its uncertainty (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uwmean" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) depends on -\begin_inset Formula $\sigma$ -\end_inset - -, which can be directly estimated through Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:dispmod" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, but by construction, one will recover results given by Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -One will therefore consider that, unless a large dispersion of reference - data uncertainty is observed, these uncertainties can be ignored in the - estimation of the mean and its standard error. - Otherwise, one should use the weighted mean with the standard uncertainty - estimate. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Note that the dispersion of model errors -\begin_inset Formula $\sigma$ -\end_inset - - is related to the model prediction uncertainty and is a score of interest - for the ranking of models -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Pernot2018" -literal "false" - -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -An advanced modeling of uncertainty sources is crucial if one wishes a reliable - estimate of the MSE, and of the various uncertainty contributions -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015" -literal "false" - -\end_inset - -. - In standard benchmarking, the aim is mostly to compare methods, knowing - that the reference datasets are incomplete. - If reference data uncertainty plays a significant role – that would be - the case if data with very different uncertainty levels were aggregated - in the dataset – one might assume that its impact will be the same for - all methods to be compared. - The values of the dispersion statistics will be consistently overestimated - for all methods. - As long as one is not interested in the accurate estimation of the underlying - properties of the error distributions, such as the model prediction uncertainty - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -, it is simpler to rely on unweighted schemes and properly curated datasets. -\end_layout - -\begin_layout Section -Numerical study of the covariance of nonlinear statistics -\begin_inset CommandInset label -LatexCommand label -name "sec:Covariance-of-scores" - -\end_inset - - -\end_layout - -\begin_layout Standard -To illustrate the transfer of covariance from the errors sets to their statistic -s, one generates random samples -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - from a bivariate distribution with prescribed correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - -. - For each ransom sample, the statistics values -\begin_inset Formula $S_{1}$ -\end_inset - - and -\begin_inset Formula $S_{2}$ -\end_inset - - are calculated, and -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - - is estimated from the statistics samples. - The error sets correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - - has been varied between -1 and 1, and the corresponding correlation coefficient -s have been estimated for the MSE, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - statistics. - The dataset size is -\begin_inset Formula $N=100$ -\end_inset - - and the correlation coefficient statistics are based on -\begin_inset Formula $10^{3}$ -\end_inset - - Monte Carlo samples. - -\end_layout - -\begin_layout Standard -The results for the four cases of the g-and-h distribution (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:The-g-and-h-distribution" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) of error sets are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:corrScore" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a-d). - In this example, both error sets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - have the same distribution with unit variance, only their correlation varies. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0h_0.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0h_0.2.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0.2h_0.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0.2h_0.2.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppB_corrScoresNormDec.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppB_corrScoresStudDec.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:corrScore" - -\end_inset - - Correlation coefficients (CC) of several statistics/scores as a function - of the CC of the samples used to estimate them. - The error bars represent 95 -\begin_inset space \thinspace{} -\end_inset - -% intervals for sampling errors. - Four cases of the g-and-h distribution are considered for the error sets: - (a) normal ( -\begin_inset Formula $g=h=0$ -\end_inset - -); (b) heavy-tailed symmetric ( -\begin_inset Formula $g=0;\thinspace h=0.2$ -\end_inset - -); (c) light-tailed asymmetric ( -\begin_inset Formula $g=0.2;\thinspace h=0$ -\end_inset - -); (d) heavy-tailed asymmetric ( -\begin_inset Formula $g=h=0.2$ -\end_inset - -). - Additional cases with shifted distributions, -\begin_inset Formula $\mu$ -\end_inset - -= (-0.2,0.5) : (e) normal ; (f) Student's- -\begin_inset Formula $t$ -\end_inset - - ( -\begin_inset Formula $\nu$ -\end_inset - -= 5). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -These simulations confirms the identity for the MSE, independently of the - underlying distribution. - The correlation coefficients for the other statistics are positive (within - numerical uncertainty) and systematically smaller than -\begin_inset Formula $|\rho|$ -\end_inset - -. - They are symmetrical with respect to -\begin_inset Formula $\rho=0$ -\end_inset - - for symmetrical error distributions. - The values for the MUE are consistently larger than, or equal to, the values - for -\begin_inset Formula $Q_{95}$ -\end_inset - -. - In all cases, the correlation coefficient for the MUE is very close to - -\begin_inset Formula $\rho^{2}$ -\end_inset - -. - For negative values of -\begin_inset Formula $\rho$ -\end_inset - -, the correlation coefficient of -\begin_inset Formula $Q_{95}$ -\end_inset - - is sensitive to the asymmetry or the errors distribution. -\end_layout - -\begin_layout Standard -The same procedure has been applied to shifted means ( -\begin_inset Formula $\overline{e}_{1}=-0.2$ -\end_inset - -, -\begin_inset Formula $\overline{e}_{2}=-0.5$ -\end_inset - -) for normal and Student's distribution with 5 degrees of freedom (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:corrScore" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(e,f)). - For the normal distribution the symmetry observed above is broken, as well - as the pure quadratic trend for the MUE. - For the Student's distribution, the correlations lie above a positive threshold. - -\end_layout - -\begin_layout Standard -Simulation of correlated error samples enabled us to illustrate properties - of correlation transfer to statistics: identical correlation for the MSE, - and smaller positive correlations for the MUE and -\begin_inset Formula $Q_{95.}$ -\end_inset - -. - As we covered only a limited set of scenarii, these features cannot be - considered as universal. - Indeed, the case studies in Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Examples" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - reveal some exceptions. -\end_layout - -\begin_layout Section -Probabilities of Type I errors for the comparison of MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - pairs -\begin_inset CommandInset label -LatexCommand label -name "sec:Type-I-error" - -\end_inset - - -\end_layout - -\begin_layout Standard -We applied the procedure followed by Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - to estimate the probability of type I errors for the comparison of quantiles - with their method M (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Standard -A false positive (type I error) is obtained when a true null hypothesis - is rejected by the test. - In the present context, one draws two samples -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - - from the same distribution and compute -\begin_inset Formula $p_{g}$ -\end_inset - - for the comparison of the values of a statistic S, -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - -, respectively. - A value of -\begin_inset Formula $p_{g}<0.05$ -\end_inset - - leads to a false rejection of the (null) hypothesis -\begin_inset Formula $s_{1}=s_{2}$ -\end_inset - -. - The process is repeated -\begin_inset Formula $M$ -\end_inset - - times, and the proportion of false rejections provides an estimation of - the probability -\begin_inset Formula $\hat{\alpha}$ -\end_inset - - of type I errors. -\end_layout - -\begin_layout Standard - -\color teal -For their tests, Wilcox and Erceg-Hurn use the g-and-h distribution (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:The-g-and-h-distribution" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) to generate the data samples. - Two levels of correlation between -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - were also considered in the original study ( -\begin_inset Formula $\rho=0$ -\end_inset - - or -\begin_inset Formula $0.7$ -\end_inset - -). - These tests aim at determining the sample size -\begin_inset Formula $N$ -\end_inset - - required to reach a probability of type I errors -\begin_inset Formula $\hat{\alpha}$ -\end_inset - - close to the statistical testing threshold. - A recommendation in the original study is that -\begin_inset Formula $\hat{\alpha}$ -\end_inset - - should not exceed -\begin_inset Formula $0.075$ -\end_inset - - for a test at the 0.05 level -\begin_inset CommandInset citation -LatexCommand cite -key "Bradley1978" -literal "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard - -\color teal -As these test cases did not include our conditions of interest in terms - of correlation and quantile level, we performed new simulations, -\color inherit - using functions provided in -\family typewriter -R -\family default - packages -\family typewriter -WRS -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "R-WRS" -literal "false" - -\end_inset - - and -\family typewriter -WRS2 -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "R-WRS2" -literal "false" - -\end_inset - -, after assessing the reproducibility of the original results. - We kept the same generative distribution and scenarii for -\begin_inset Formula $g$ -\end_inset - - and -\begin_inset Formula $h$ -\end_inset - - parameters, and we extended the exploration for dataset size from -\begin_inset Formula $N=20$ -\end_inset - - to 70, and correlation coefficient -\begin_inset Formula $\rho=0,\thinspace0.5,\thinspace0.9$ -\end_inset - -, more representative of the conditions of interest in the present study. - For compatibility with the original study, the number of replications was - kept to -\begin_inset Formula $M=2000$ -\end_inset - -, and the number of bootstrap samples to -\begin_inset Formula $B=1000$ -\end_inset - -. - The results are summarized in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppC_Alpha_MUE.png - lyxscale 25 - width 49text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppC_Alpha_Q95.png - lyxscale 25 - width 49text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:power1" - -\end_inset - - Probability of type I errors -\begin_inset Formula $\hat{\alpha}$ -\end_inset - - for the MUE (left) and -\begin_inset Formula $Q_{95}$ -\end_inset - - (right), as a function of dataset size -\begin_inset Formula $N$ -\end_inset - -. - Each graph corresponds to a type of g-and-h distribution for the data samples - (see text for details). - The points and lines correspond to a value of the datasets correlation - coefficient -\begin_inset Formula $\rho$ -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -For the MUE, the safety region ( -\begin_inset Formula $\hat{\alpha}\le0.075$ -\end_inset - -; black dashed line) -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none - -\begin_inset CommandInset citation -LatexCommand cite -key "Bradley1978" -literal "false" - -\end_inset - - -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit - is reached in all cases for -\begin_inset Formula $N>30$ -\end_inset - -, and then all values are close to the nominal value (0.05). - There is no remarkable trend with respect to the type of g-and-h distribution, - nor the correlation coefficient. - We have estimated previously -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - - that the MUE is typically located between the 0.5 and 0.75 quantiles, for - which Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - have concluded that the minimal request is -\begin_inset Formula $N\ge30$ -\end_inset - -, which we confirm. -\end_layout - -\begin_layout Standard -For -\begin_inset Formula $Q_{95}$ -\end_inset - -, one sees that for -\begin_inset Formula $N=40$ -\end_inset - -, the situation is more favorable for the normal distribution, but in all - cases, the recommended limit is reached globally for -\begin_inset Formula $N\ge60$ -\end_inset - -. - Strong correlation coefficients ( -\begin_inset Formula $\rho=0.9$ -\end_inset - -) seem also to be almost systematically more favorable, and there is a slight - deleterious effect below -\begin_inset Formula $N=50$ -\end_inset - - for heavy-tailed distributions ( -\begin_inset Formula $h=0.2$ -\end_inset - -). - Nevertheless, even for -\begin_inset Formula $N=40$ -\end_inset - -, -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none - -\begin_inset Formula $\hat{\alpha}$ -\end_inset - - does not exceed notably the 10 -\begin_inset space \thinspace{} -\end_inset - -% type I error probability (red dashed line). -\end_layout - -\begin_layout Paragraph -Remark. -\end_layout - -\begin_layout Standard -Establishing the power of the test ( -\begin_inset Formula $1-\beta$ -\end_inset - -), where -\begin_inset Formula $\beta$ -\end_inset - - is the probability of type II errors (false negative, or the non-rejection - of a false null hypothesis) is practically impossible unless one defines - a specific alternative hypothesis. - In the present case, there is a infinity of ways to realize the -\begin_inset Formula $s_{1}\ne s_{2}$ -\end_inset - - alternative, so the power estimation is intractable. - -\end_layout - -\begin_layout Section -Numerical study of the Harrell and Davis algorithm -\begin_inset CommandInset label -LatexCommand label -name "sec:Simulated-example" - -\end_inset - - -\end_layout - -\begin_layout Standard -This example is intended to outline the advantages of Harrell and Davis - (HD) algorithm for quantiles estimation, notably when associated with bootstrap - sampling. - One considers the values -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - - of a statistic -\begin_inset Formula $S$ -\end_inset - - on two datasets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - -, which are drawn from a bivariate normal distribution -\begin_inset Formula -\begin{equation} -(E_{1},E_{2})\sim\mathcal{N}\left(\boldsymbol{\mu}=(\mu_{1},\mu_{2}),\boldsymbol{\Sigma}=\left(\begin{array}{cc} -\sigma_{1}^{2} & \rho\sigma_{1}\sigma_{2}\\ -\rho\sigma_{1}\sigma_{2} & \sigma_{2}^{2} -\end{array}\right)\right)\label{eq:bivnorm} -\end{equation} - -\end_inset - -where the error samples have different means -\begin_inset Formula $(\mu_{1},\mu_{2})$ -\end_inset - - and variances -\begin_inset Formula $(\sigma_{1}^{2},\sigma_{2}^{2})$ -\end_inset - -, and -\begin_inset Formula $\mathrm{cov}(E_{1},E_{2})=\rho\sigma_{1}\sigma_{2}$ -\end_inset - -. - The values of the parameters for the simulations and the corresponding - statistics are given in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:Exact-values" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The values for the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - are obtained as described in Ref. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - Those values are fairly representative of some problems treated in the - case studies. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Set -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -RMSD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $E_{1}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.88 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.16 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $E_{2}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.80 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.97 -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:Exact-values" - -\end_inset - -Reference values for the univariate statistics of datasets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - described by Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:bivnorm" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, for -\begin_inset Formula $\mu_{1}=0$ -\end_inset - -, -\begin_inset Formula $\mu_{2}=0.1$ -\end_inset - -, -\begin_inset Formula $\sigma_{1}=1.1$ -\end_inset - - and -\begin_inset Formula $\sigma_{2}=1.0$ -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Comparison to quantiles estimated with -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - -\begin_inset CommandInset label -LatexCommand label -name "subsec:Quantiles-estimation-by" - -\end_inset - - -\end_layout - -\begin_layout Standard -The numerical case is based on the distribution for -\begin_inset Formula $E_{2}$ -\end_inset - -, as presented above. - In a first test, -\begin_inset Formula $E_{2}$ -\end_inset - - sets of increasing sizes between 20 and 500 were generated by random sampling - from the corresponding normal distribution, and -\begin_inset Formula $Q_{95}$ -\end_inset - - was estimated by two algorithms: the HD algorithm and the -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - method of Hyndman and Fan -\begin_inset CommandInset citation -LatexCommand cite -key "Hyndman1996" -literal "false" - -\end_inset - -, which is the default algorithms in the -\family typewriter -quantile() -\family default - function of -\family typewriter -R -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "CiteR" -literal "false" - -\end_inset - -. - -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - is one of a family of quantile estimators based on the linear combination - of one or two order statistics -\begin_inset CommandInset citation -LatexCommand cite -key "Hyndman1996" -literal "false" - -\end_inset - -, whereas the HD algorithm is based on the linear combination of all order - statistics for a sample -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - -. - It is therefore more computationally demanding and is not proposed in the - base R quantile function options. -\end_layout - -\begin_layout Standard -The above procedure is repeated 10000 times, and the distributions of -\begin_inset Formula $Q_{95}$ -\end_inset - - values are summarized by a set of five quantiles (0.05, 0.25, 0.5, 0.75, 0.95). - The results are presented in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - This simulation shows that the HD quantiles converge faster than the reference - one, with less bias for small samples ( -\begin_inset Formula $N<100$ -\end_inset - -). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppD_Compare_Q95.png - lyxscale 30 - width 65col% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:simul1" - -\end_inset - -Comparison of -\begin_inset Formula $Q_{95}$ -\end_inset - - estimation algorithms, -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - and HD: (a) Monte Carlo sampling; (b) bootstrap sampling; (c) bootstrap - sample histogram for -\begin_inset Formula $N=100$ -\end_inset - -; (d) idem for -\begin_inset Formula $N=400$ -\end_inset - -. - The thicker bars in (a,b) represent 25-75 -\begin_inset space \thinspace{} -\end_inset - -% probability intervals and the finer bars 5-95 -\begin_inset space \thinspace{} -\end_inset - -% probability intervals. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -In the second test, a unique -\begin_inset Formula $E_{2}$ -\end_inset - - sample of size -\begin_inset Formula $N=500$ -\end_inset - - is generated, and subsets of increasing size are taken as initial data - for a bootstrap procedure (10000 repeats). - The bootstrap samples are analyzed as above and plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b). - The difference between both quantile algorithms is less striking, but the - reference algorithm seems to produce quite asymmetric distributions, where - the median is close to one of the quartiles. - If one looks at the histograms of sampled values for -\begin_inset Formula $N=100$ -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)), one sees that the HD algorithms produces a much smoother distribution, - where -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - produces rugged histograms. - The same features are still visible, to a lesser extent, for -\begin_inset Formula $N=400$ -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d)). - This feature of the HD method explains its good performances for small - samples, when used in conjunction with the bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Impact of dataset size on the comparison of statistics -\begin_inset CommandInset label -LatexCommand label -name "subsec:Impact-of-dataset" - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -For the comparison of the means, one can express directly the discrepancy - factor in terms of the datasets properties, -\begin_inset Formula $s_{1,2}=\mu_{1,2}$ -\end_inset - -, -\begin_inset Formula $u(s_{1,2})=\sigma_{1,2}/\sqrt{N}$ -\end_inset - -, -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})=\mathrm{cov}(E_{1},E_{2})=\rho\sigma_{1}\sigma_{2}$ -\end_inset - -, as -\begin_inset Formula -\begin{align} -\xi(s_{1},s_{2}) & =\frac{|\mu_{1}-\mu_{2}|}{\sqrt{\frac{1}{N}}\sqrt{\sigma_{1}^{2}+\sigma_{1}^{2}-2\sigma_{1}\sigma_{2}\rho}}\\ - & =\beta\sqrt{N} -\end{align} - -\end_inset - -The number of sample points necessary to reach the discrepancy threshold - -\begin_inset Formula $\kappa$ -\end_inset - - is -\begin_inset Formula -\begin{equation} -N_{t}=\left(\frac{\kappa}{\beta}\right)^{2} -\end{equation} - -\end_inset - -and the -\begin_inset Formula $p_{t}$ -\end_inset - - value can be expressed analytically -\begin_inset Formula -\begin{equation} -p_{t}=2*\left(1-\Phi(\beta\sqrt{N})\right) -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -As a first illustration, let us consider independent samples ( -\begin_inset Formula $\rho=0$ -\end_inset - -). - In this case, the sample size necessary to discriminate the two means is - at least -\begin_inset Formula $N_{t}=855$ -\end_inset - - points. - This is a large value, because the difference in the mean values is much - smaller than the combined standard deviation of the samples. - If the correlation between the datasets is increased to -\begin_inset Formula $\rho=0.9$ -\end_inset - - – a value commonly observed in the real-life datasets – one gets a tenfold - smaller limit sample size, -\begin_inset Formula $N_{t}=89$ -\end_inset - - points. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Estimation of -\begin_inset Formula $p$ -\end_inset - --values -\begin_inset CommandInset label -LatexCommand label -name "subsec:Estimation-of--values" - -\end_inset - - -\end_layout - -\begin_layout Standard -The estimation of -\begin_inset Formula $p$ -\end_inset - --values is obtained by Monte Carlo sampling of -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - sets of size -\begin_inset Formula $N$ -\end_inset - - varying between 20 and 500 ( -\begin_inset Formula $\rho=0.9$ -\end_inset - -). - One first checks that the generalized -\begin_inset Formula $p$ -\end_inset - --value -\begin_inset Formula $p_{g}$ -\end_inset - - (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) is identical to the analytical value of -\begin_inset Formula $p_{t}$ -\end_inset - - for the comparison of mean values (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:scoreBS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)). - -\end_layout - -\begin_layout Standard -Then, the interest of the Harrell-Davis algorithm for the estimation of - -\begin_inset Formula $p_{g}$ -\end_inset - - values for the comparison of quantiles is shown in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:scoreBS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b): reaching the 0.05 threshold requires about 250 points for the HD method, - whereas the -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - reference quantile algorithm requires about 380 points. - Besides, the HD curve is smoother than the reference one, due to the smoothness - properties of the HD estimator shown above. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppD_scoresBS.png - lyxscale 30 - width 65col% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:scoreBS" - -\end_inset - -Validation of methodological choices for -\begin_inset Formula $p$ -\end_inset - --value estimation: (a) generalized -\begin_inset Formula $p$ -\end_inset - --value -\begin_inset Formula $p_{g}$ -\end_inset - - for the comparison of means (MSE) compared to the analytical result -\begin_inset Formula $p_{t}$ -\end_inset - - ; (b) impact of the quantile estimation algorithms on -\begin_inset Formula $p_{g}$ -\end_inset - - for the comparison of -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -The g-and-h distribution -\begin_inset CommandInset label -LatexCommand label -name "sec:The-g-and-h-distribution" - -\end_inset - - -\end_layout - -\begin_layout Standard - -\color teal -The g-and-h distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Hoaglin1985" -literal "false" - -\end_inset - - is used in the following sections to study the impact of the distributions - shapes on several statistics. - -\end_layout - -\begin_layout Standard - -\color teal -If -\begin_inset Formula $z$ -\end_inset - - has a standard normal distribution, its transform -\begin_inset Formula -\begin{equation} -X=\frac{1}{g}(e^{gz}-1)e^{hz^{2}} -\end{equation} - -\end_inset - -has a g-and-h distribution. - Its shape is defined by parameters -\begin_inset Formula $g$ -\end_inset - - and -\begin_inset Formula $h$ -\end_inset - -, and contains the normal distribution as a special case ( -\begin_inset Formula $g=h=0$ -\end_inset - -). - Besides the normal, three typical cases are proposed by Wilcox and Erceg-Hurn - -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -: heavy-tailed symmetric ( -\begin_inset Formula $g=0;\thinspace h=0.2$ -\end_inset - -), light-tailed asymmetric ( -\begin_inset Formula $g=0.2;\thinspace h=0$ -\end_inset - -), and heavy-tailed asymmetric ( -\begin_inset Formula $g=h=0.2$ -\end_inset - -). - These four cases will be considered below. - -\end_layout - -\begin_layout Standard -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Statistical power of -\begin_inset Formula $p_{g}$ -\end_inset - - estimation by bootstrap -\end_layout - -\begin_layout Plain Layout -We now test the quality of -\begin_inset Formula $p_{g}$ -\end_inset - - estimated by the MC-BS method (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:MC-BS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) using an hypothetical bivariate normal distribution for -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:bivnorm" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Plain Layout -This method is applied to the MSE, RMSD, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - with -\begin_inset Formula $M=B=1000$ -\end_inset - - and -\begin_inset Formula $\rho=0.9$ -\end_inset - -. - The results are plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, where the distributions of -\begin_inset Formula $p_{g}$ -\end_inset - - values are summarized by five quantiles (0.05, 0.25, 0.5, 0.75, 0.95). - For the MSE, one sees that the median of the -\begin_inset Formula $p_{g}$ -\end_inset - - values is very close to the theoretical value ( -\emph on -cf. - -\emph default - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:scoreBS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), and the 5 percent threshold is reached for -\begin_inset Formula $N\simeq100$ -\end_inset - -. - However the distribution of -\begin_inset Formula $p_{g}$ -\end_inset - - values is such that it would take more than 300 points to reject with high - confidence (95 -\begin_inset space \thinspace{} -\end_inset - -%) the equality of the means. - The figures are about the same for the RMSD. - For the MUE, the median reaches the 5 percent threshold at -\begin_inset Formula $N\simeq130$ -\end_inset - -, but one would need more than 400 points to reject the equality hypothesis. - In this example, even with datasets of 500 points one has still 25 percent - chance to conclude wrongly that both -\begin_inset Formula $Q_{95}$ -\end_inset - - values are equal. -\end_layout - -\begin_layout Plain Layout -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename /home/pernot/Bureau/Andreas/1-Paper2019Comp/results/figs/Simul_Power.png - lyxscale 30 - width 99text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:power" - -\end_inset - -Power study of the -\begin_inset Formula $p_{g}$ -\end_inset - - value in the testing of the equality of statistics for two correlated normal - samples -\begin_inset Formula $E_{1}\sim\mathcal{N}(0.0,1.1)$ -\end_inset - -, -\begin_inset Formula $E_{2}\sim\mathcal{N}(0.1,1.0)$ -\end_inset - - and -\begin_inset Formula $\rho=\mathrm{cor}(E_{1},E_{2})=0.9$ -\end_inset - -, and for increasing sample size -\begin_inset Formula $N$ -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Following Wilcox -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -, problems in the comparison of quantiles is largely due to the standard - estimators for quantiles -\begin_inset CommandInset citation -LatexCommand cite -key "Hyndman1996" -literal "false" - -\end_inset - -, which should be replaced by the Harrell-Davis estimator -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - -. - Wilcox proposes also to use a generalized -\begin_inset Formula $p$ -\end_inset - --value -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997" -literal "false" - -\end_inset - - for the comparison of statistics. - -\end_layout - -\begin_layout Plain Layout -To appraise the effect of this choice, three methods to estimate the -\begin_inset Formula $p$ -\end_inset - --value for the comparison of quantiles are compared: -\end_layout - -\begin_layout Itemize -Q95 : standard R quantile() function / -\begin_inset Formula $p_{t}$ -\end_inset - - -\end_layout - -\begin_layout Itemize -Q95_HD : Harrell & Davis (HD) quantile estimator -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - - / -\begin_inset Formula $p_{t}$ -\end_inset - - -\end_layout - -\begin_layout Itemize -Q95_M -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -: HD quantile estimator / -\begin_inset Formula $p_{g}$ -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997" -literal "false" - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -The results are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - First, to assess the impact of the Harrell-Davis method, a Monte Carlo - study was performed to generate sample of p-values from repeated sampling - of a bi-variate normal distribution above, with -\begin_inset Formula $\rho=0.9$ -\end_inset - -. - -\end_layout - -\begin_layout Plain Layout - -\color orange -Theoretical value of difference ? -\begin_inset Formula $s_{1}=1.96$ -\end_inset - -, -\begin_inset Formula $s_{2}=2.36$ -\end_inset - -, -\begin_inset Formula $|s_{1}-s_{2}|=0.4$ -\end_inset - - -\end_layout - -\begin_layout Plain Layout -As seen in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a), the HD method provides a marginal advantage, -\emph on -i.e. -\emph default -, the null hypothesis can be rejected for smaller samples. - The 0.05 threshold is reached for a sample size of about 300 for the HD - method, when the standard method requires a sample size of 400. - -\color orange -Compare to theoretical value. -\end_layout - -\begin_layout Plain Layout -When considering the real issue due to bootstrapping of quantiles, the advantage - of the HD method is more pronounced. - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b) shows the statistics for the -\begin_inset Formula $p$ -\end_inset - --values obtained by bootstrapping of a sample, with a Monte Carlo repetition - for a series of samples. - For each method and size, the 0.025, 0.25, 0.5, 0.75 and 0.975 percentiles of - the Monte Carlo samples are shown. - -\end_layout - -\begin_layout Plain Layout -A first point to note is that the median values follow curves which are - in good agreement withe the Monte Carlo ones in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a). - However, the dispersion is quite large, and, even for sample sizes as large - as 1000, the bootstrap has a non-vanishing probability of wrong answers - ( -\emph on -i.e. -\emph default -, -\begin_inset Formula $p>0.05$ -\end_inset - -). - If one looks at the inter-quartile range, one sees that it is always smaller - for the HD method than for the standard one. - In this sense, the Q95_HD method has an improved statistical power over - the Q95 method, but it is only for sample sizes above 700 that the probability - of errors falls below 25 -\begin_inset space \thinspace{} -\end_inset - -%. - The impact of the generalized -\begin_inset Formula $p$ -\end_inset - --value (Q95_M method) is small on our test case (BETTER IQR !!!), but it - might become advantageous for non-normal distributions -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997" -literal "false" - -\end_inset - -. -\begin_inset Foot -status open - -\begin_layout Plain Layout - The Q95_M method is available in -\family typewriter -R -\family default - as the function -\family typewriter -Dqcomhd, -\family default - from package WSR2 -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012,Wilcox2018,R-WRS2" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\series bold -\color orange -Check normality of stats-difference sets -\end_layout - -\begin_layout Plain Layout - -\color orange -Method M (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) gives a single -\begin_inset Formula $p_{g}$ -\end_inset - - value, without any estimate of its statistical power, -\emph on -i.e. - -\emph default - the . - -\end_layout - -\begin_layout Plain Layout - -\color orange -Unfortunately, the distributions underlying the error sets are unknown and - typically non-normal. - If one wants to be able to perform a Monte Carlo estimation of -\begin_inset Formula $P_{g}$ -\end_inset - -'s range of variation for a given pair of datasets, one can attempt to model - the underlying bivariate distribution, which might be a difficult problem, - notably for small samples. - A much easier option is to optimize a bivariate normal distribution with - similar statistics. - For instance, for comparison of MUEs, one could choose a pair of distributions - with MUEs similar to the reference ones, without caring for other statistics, - such as -\begin_inset Formula $RMSD$ -\end_inset - - or -\begin_inset Formula $Q_{95}$ -\end_inset - -. - One could then perform a MC-BS procedure to assess the distribution of - -\begin_inset Formula $p_{g}$ -\end_inset - -. -\end_layout - -\begin_layout Plain Layout -\begin_inset Float algorithm -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: Two error sets -\begin_inset Formula $E_{1}$ -\end_inset - -, -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, and a statistic estimator -\begin_inset Formula $S$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Monte Carlo -\end_layout - -\begin_deeper -\begin_layout Enumerate -generate -\begin_inset Formula $M$ -\end_inset - - paired datasets -\begin_inset Formula $(E_{1}^{(i)},E_{2}^{(i)});i=1..M$ -\end_inset - -, by random sampling of their joint distribution (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:bivnorm-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) -\end_layout - -\end_deeper -\begin_layout Enumerate -Bootstrap -\end_layout - -\begin_deeper -\begin_layout Enumerate -for each dataset pair, a -\begin_inset Formula $p_{g}^{(i)}$ -\end_inset - - value is estimated using Method M (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) -\end_layout - -\end_deeper -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:MC-BS" - -\end_inset - -MC-BS sampling of -\begin_inset Formula $p_{g}$ -\end_inset - - values. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Float algorithm -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: two correlated error sets -\begin_inset Formula $E_{1}$ -\end_inset - -, -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, and a statistic estimator -\begin_inset Formula $S$ -\end_inset - - (MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - -) -\end_layout - -\begin_layout Enumerate -For both error sets -\end_layout - -\begin_deeper -\begin_layout Enumerate -Estimate the MSE -\begin_inset Formula $m_{i}$ -\end_inset - -, -\begin_inset Formula $S$ -\end_inset - - value -\begin_inset Formula $s_{i}$ -\end_inset - - and their uncertainties -\begin_inset Formula $u(m_{i})$ -\end_inset - - and -\begin_inset Formula $u(s_{i})$ -\end_inset - - by bootstrap -\end_layout - -\begin_layout Enumerate -Optimize the parameters of a normal distribution -\begin_inset Formula $\mathcal{N}(\mu_{i},\sigma_{i})$ -\end_inset - - to minimize the weighted sum of squared residuals -\begin_inset Formula -\begin{equation} -\chi^{2}(\mu_{i},\sigma_{i})=\frac{\left(m_{i}-\mu_{i}\right)^{2}}{u^{2}(m_{i})}+\frac{\left(s_{i}-S_{f}(\mu_{i},\sigma_{i})\right)^{2}}{u^{2}(s_{i})} -\end{equation} - -\end_inset - -where -\begin_inset Formula $S_{f}(\mu,\sigma)$ -\end_inset - - is a function giving the adequate statistic (MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - -) for a folded normal distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. -\end_layout - -\end_deeper -\begin_layout Enumerate -Estimate the correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - - between -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Build a normal bivariate distribution with optimized parameters -\begin_inset Formula -\begin{equation} -(E_{1}^{*},E_{2}^{*})\sim\mathcal{N}\left(\boldsymbol{\mu}=(\mu_{1},\mu_{2}),\boldsymbol{\Sigma}=\left(\begin{array}{cc} -\sigma_{1}^{2} & \rho\sigma_{1}\sigma_{2}\\ -\rho\sigma_{1}\sigma_{2} & \sigma_{2}^{2} -\end{array}\right)\right)\label{eq:bivnorm-1} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Enumerate -Perform a MC-BS analysis to get a set of -\begin_inset Formula $p_{g}$ -\end_inset - - values from this distribution (Algo. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:MC-BS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:pg-dist" - -\end_inset - -Sampling -\begin_inset Formula $p_{g}$ -\end_inset - - values from approximating normal distributions. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Most of the pertinent methods tested in the present article come from the - bio-medical statistics (see, -\emph on -e.g., -\emph default - Ref. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2018" -literal "false" - -\end_inset - -), where small samples are very common. - -\end_layout - -\begin_layout Plain Layout -Unless the reference dataset is exhaustive, error statistics are affected - by a finite size uncertainty. - When comparing uncertain statistics it is important to note that they might - not be independent -\begin_inset CommandInset citation -LatexCommand cite -key "Nicholls2016" -literal "false" - -\end_inset - -, which might strongly affect the comparison result. - For instance, the correlation of the means of two samples is equal to the - correlation of the samples, -\begin_inset Formula $\mathrm{cor}(\overline{x},\overline{y})=\mathrm{cor}(X,Y)$ -\end_inset - -. - When two samples are strongly positively correlated, which is often the - case in benchmark error datasets as shown below, small differences in their - means tend to be more significant than for independent samples. - This might have a non-negligible impact on the ranking of methods. -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Comparison of means -\end_layout - -\begin_layout Plain Layout - -\color orange -Two-Sample t-Test for Equal Means (REF???) cf. - -\begin_inset Flex URL -status open - -\begin_layout Plain Layout - -https://www.itl.nist.gov/div898/handbook/eda/section4/eda43.htm##Snedecor -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset FloatList figure - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset FloatList table - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset FloatList algorithm - -\end_inset - - -\end_layout - -\end_body -\end_document diff --git a/article/article_v1.0.lyx b/article/article_v1.0.lyx deleted file mode 100644 index cd2e307..0000000 --- a/article/article_v1.0.lyx +++ /dev/null @@ -1,25891 +0,0 @@ -#LyX 2.3 created this file. For more info see http://www.lyx.org/ -\lyxformat 544 -\begin_document -\begin_header -\save_transient_properties true -\origin unavailable -\textclass revtex4-1 -\begin_preamble -\usepackage{datetime} -\usepackage{refstyle} -\usepackage{longtable} -\usepackage{url} -\usepackage[title,toc,page,header]{appendix} -%%\usepackage[nosort,super]{cite} - -\pdfminorversion=5 -\pdfcompresslevel=9 -\pdfobjcompresslevel=5 -\end_preamble -\options aip, preprint -\use_default_options false -\begin_modules -fixltx2e -fix-cm -\end_modules -\maintain_unincluded_children false -\language english -\language_package none -\inputencoding utf8 -\fontencoding T1 -\font_roman "lmodern" "default" -\font_sans "default" "default" -\font_typewriter "default" "default" -\font_math "auto" "auto" -\font_default_family default -\use_non_tex_fonts false -\font_sc false -\font_osf false -\font_sf_scale 100 100 -\font_tt_scale 100 100 -\use_microtype true -\use_dash_ligatures true -\graphics default -\default_output_format default -\output_sync 0 -\bibtex_command default -\index_command default -\float_placement !t -\paperfontsize 11 -\spacing single -\use_hyperref true -\pdf_title "Comparison of methods" -\pdf_bookmarks false -\pdf_bookmarksnumbered false -\pdf_bookmarksopen false -\pdf_bookmarksopenlevel 1 -\pdf_breaklinks true -\pdf_pdfborder true -\pdf_colorlinks true -\pdf_backref false -\pdf_pdfusetitle false -\pdf_quoted_options "citecolor =blue, linkcolor = blue, urlcolor = blue" -\papersize default -\use_geometry true -\use_package amsmath 2 -\use_package amssymb 2 -\use_package cancel 0 -\use_package esint 1 -\use_package mathdots 0 -\use_package mathtools 0 -\use_package mhchem 2 -\use_package stackrel 0 -\use_package stmaryrd 0 -\use_package undertilde 0 -\cite_engine natbib -\cite_engine_type numerical -\biblio_style plainnat -\use_bibtopic false -\use_indices false -\paperorientation portrait -\suppress_date false -\justification true -\use_refstyle 0 -\use_minted 0 -\index Index -\shortcut idx -\color #008000 -\end_index -\leftmargin 2cm -\topmargin 2cm -\rightmargin 2cm -\bottommargin 2cm -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation indent -\paragraph_indentation default -\is_math_indent 0 -\math_numbering_side default -\quotes_style english -\dynamic_quotes 0 -\papercolumns 1 -\papersides 1 -\paperpagestyle default -\tracking_changes false -\output_changes false -\html_math_output 0 -\html_css_as_file 0 -\html_be_strict false -\end_header - -\begin_body - -\begin_layout Title -Probabilistic performance estimators for computational chemistry methods: - Systematic Improvement Probability and Ranking Probability Matrix -\begin_inset Note Note -status open - -\begin_layout Plain Layout - -\series bold -\color blue -On the comparison of error sets and -\begin_inset Newline newline -\end_inset - -their statistics in benchmarking studies -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Author -Pascal PERNOT -\end_layout - -\begin_layout Affiliation -Institut de Chimie Physique, UMR8000, -\begin_inset Newline newline -\end_inset - -CNRS, Université Paris-Saclay, -\begin_inset Newline newline -\end_inset - -91405 Orsay, France -\end_layout - -\begin_layout Author Email - -Pascal.Pernot@universite-paris-saclay.fr -\begin_inset Newline newline -\end_inset - - -\end_layout - -\begin_layout Author -Andreas SAVIN -\end_layout - -\begin_layout Affiliation -Laboratoire de Chimie Théorique, -\begin_inset Newline newline -\end_inset - -CNRS and UPMC Université Paris 06, -\begin_inset Newline newline -\end_inset - -Sorbonne Universités, 75252 Paris, France -\end_layout - -\begin_layout Author Email - -Andreas.Savin@lct.jussieu.fr -\end_layout - -\begin_layout Abstract -The comparison of benchmark error sets is an essential tool for the evaluation - of theories in computational chemistry. - The standard ranking of methods by their Mean Absolute Error is unsatisfactory - for several reasons linked to the non-normality of the error distributions - and the presence of underlying trends. - Complementary statistics have recently been proposed to palliate such deficienc -ies, such as quantiles of the absolute errors distribution or the mean predictio -n uncertainty. - We introduce here a new score, the systematic improvement probability (SIP), - based on the direct pair-wise comparison of absolute errors, bypassing - the need of other descriptive statistics. - Independently of the chosen scoring rule, the uncertainty of the statistics - due to the incompleteness of the benchmark data sets is also generally - overlooked. - However, this uncertainty is essential to appreciate the robustness of - score-based rankings. - In the present article, we develop two indicators based on robust statistics - to address this problem: -\begin_inset Formula $P_{inv}$ -\end_inset - -, the inversion probability between two values of a statistic, and -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - -, the ranking probability matrix. - We demonstrate also the essential contribution of the correlations between - error sets in these scores comparisons. - These methods are applied to eight data sets extracted from the recent - benchmarking literature. -\end_layout - -\begin_layout Section -Introduction -\end_layout - -\begin_layout Standard -Benchmarks are a central tool for the evaluation of new theories/methods - in quantum chemistry -\begin_inset CommandInset citation -LatexCommand cite -key "Mata2017" -literal "false" - -\end_inset - -. - Amongst many possible metrics -\begin_inset CommandInset citation -LatexCommand citep -key "Civalleri2012" -literal "false" - -\end_inset - -, the most common benchmarking statistics are the mean unsigned error (MUE/MAD/M -AE...), mean signed error (MSE), root mean squared error (RMSE) and root mean - squared deviation (RMSD). - The explicit definition of these scores is given in a previous article - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - In a vast majority of benchmark studies, the MUE, or some variant of it, - is used to compare methods performance. -\begin_inset Note Note -status open - -\begin_layout Plain Layout -If the ranking of methods is precious for developers who want to assess - the impact of their latest methods, it might be of less interest for final - users. - In particular, it does not generally offer the choice and criteria for - picking another method than the -\begin_inset Quotes eld -\end_inset - -best one -\begin_inset Quotes erd -\end_inset - -. -\end_layout - -\end_inset - - Recently -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -, we proposed a more informative probabilistic score, the 95th percentile - of the absolute errors distribution ( -\begin_inset Formula $Q_{95}$ -\end_inset - -). -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -We argued that -\begin_inset Formula $Q_{95}$ -\end_inset - - is more informative than the MUE, because the latter provides probabilistic - information only if the errors distribution is zero-centered normal, a - rather unlikely occurrence. - In contrast, -\begin_inset Formula $Q_{95}$ -\end_inset - - gives us the error level that one has only 5 -\begin_inset space \thinspace{} -\end_inset - -% chance to exceed in a new calculation (provided that the reference dataset - is representative of the systems for which predictions are sought). - The end-users can easily check if this threshold meets their expectations. - We recently realized that the 90th percentile (noted -\begin_inset Formula $P_{90}$ -\end_inset - -) has been used by Thakkar and colleagues in the same spirit -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015,Wu2015b" -literal "false" - -\end_inset - -. - We think -\begin_inset Formula $Q_{95}$ -\end_inset - - is more appropriate because of its direct link to the enlarged uncertainty - -\begin_inset Formula $u_{95}$ -\end_inset - - recommended in the thermochemistry literature -\begin_inset CommandInset citation -LatexCommand cite -key "Ruscic2014,Pernot2018" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Whichever the statistic used, the question remains of the robustness of - such scores and rankings with respect to the choice of the reference dataset. - One easily conceives that the values of these statistics change unpredictably - when one adds or removes points in the dataset. - Benchmarks implicitly assume that the error sets are representative samples - of unknown distributions characterizing model errors for each method – - the more systems in the dataset, the best the approximation of the underlying - distributions. - The quest for large datasets incurs heavy computer charges to perform benchmark -s, and there is also a trend to reduce this burden by looking for small, - optimally representative, datasets -\begin_inset CommandInset citation -LatexCommand cite -key "Gould2018,Morgante2019" -literal "false" - -\end_inset - -. - Besides, there are several properties for which the reference data are - rather sparse, leading to rather small datasets. - Another trend, enhanced by the development of machine learning is to replace - experimental values by gold standard calculations, with limitations on - the size of accessible systems -\begin_inset CommandInset citation -LatexCommand cite -key "Ramakrishnan2015,Zaspel2019" -literal "false" - -\end_inset - -. - As the estimated values of the statistics and their uncertainties depend - on the size of the dataset, it is important to assess this size effect - and its impact on statistics comparison and ranking. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\series bold -\color red -Comment on (non-)randomness of error sets (Andreas, any idea ?). -\end_layout - -\begin_layout Itemize - -\color red -reference data not random, but typically selected as representative of a - larger population -\end_layout - -\begin_layout Itemize - -\color red -error sets can be biased, should ideally be corrected, then unpredictability - remains -\end_layout - -\begin_layout Itemize - -\color red -in the limit of an exact method, error sets dominated by random exptl errors -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -This question has been considered recently by Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - -, who used bootstrapping to assess the impact of dataset size and reference - data uncertainty on the first place in an intercomparison of M -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash - -\begin_inset Quotes erd -\end_inset - -o -\end_layout - -\end_inset - -ssbauer isomer shifts estimated by a dozen of DFAs. - They concluded that for their dataset of -\begin_inset Formula $N=39$ -\end_inset - - values, at least three methods were competing for the first place, with - a slight probabilistic advantage for PBE0. - This is a very interesting contribution to the quality assessment of benchmarki -ng tools. - We recently considered another approach to this problem by defining an - inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - for the ranking of two methods -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - Our definition, which was based on the assumption of a normal distribution - of statistics differences and neglected error sets correlations, deserves - a more general setup. - -\end_layout - -\begin_layout Standard -In the present study, we revisit the ranking uncertainty problem along several - complementary lines: -\end_layout - -\begin_layout Enumerate -we consider the statistical significance of the difference between two values - of a statistic: it depends both on the uncertainty on the estimated values, - which is notably influenced by the dataset size, and on the correlation - between these values, which is due in a large part to the use of a common - reference dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Nicholls2016" -literal "false" - -\end_inset - -. - A few specific points have also to be considered: the non-normality of - the error sets distributions, the small size of some datasets, the uncertainty - on reference data, and some properties of quantiles estimators. - -\end_layout - -\begin_layout Enumerate -we define a ranking probability matrix -\begin_inset Formula $\mathrm{P}_{r}$ -\end_inset - -, generalizing the proposition of Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - -, which enables us to propose an efficient visual assessment of the robustness - of rankings. -\end_layout - -\begin_layout Enumerate -we introduce a new statistic (the systematic improvement probability, SIP) - that conveys the proportion of systems in the benchmark data set for which - one method has smaller absolute errors than the other, and the expected - gain or loss when switching between methods. -\end_layout - -\begin_layout Standard -\begin_inset VSpace defskip -\end_inset - - -\end_layout - -\begin_layout Standard -In the next section, we consider the uncertainty sources impacting the values - of benchmarking statistics (scores) and we present the tools best adapted - to estimate the uncertainty on statistics and to compare them. - These methods are then validated on several datasets taken from the recent - benchmarking literature and covering a wide range of dataset sizes and - properties. - The discussion considers the impact of these observations on the benchmarking - practice and proposes several suggestions on their reporting, as well as - on the best practice to share benchmark data. -\end_layout - -\begin_layout Section -Statistical methods -\end_layout - -\begin_layout Subsection -Error sets, their uncertainty and correlation -\begin_inset CommandInset label -LatexCommand label -name "subsec:Error-sets,-their" - -\end_inset - - -\end_layout - -\begin_layout Standard -Benchmarking of a method -\begin_inset Formula $M$ -\end_inset - - is based on the statistical analysis of its error set ( -\begin_inset Formula $E_{M}=\left\{ e_{i}(M)\right\} _{i=1}^{N}$ -\end_inset - -), based on a set of -\begin_inset Formula $N$ -\end_inset - - calculated ( -\begin_inset Formula $C_{M}=\left\{ c_{i}(M)\right\} _{i=1}^{N}$ -\end_inset - -) and reference data ( -\begin_inset Formula $R=\left\{ r_{i}\right\} _{i=1}^{N}$ -\end_inset - -), where -\begin_inset Formula -\begin{equation} -e_{i}(M)=r_{i}-c_{i}(M)\label{eq:errors-def} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Uncertainty. -\end_layout - -\begin_layout Standard -As the reference data or even the calculated values can be uncertain, one - should consider that the error sets contain uncertain values when estimating - and comparing statistics. - Experimental or computational uncertainties being typically estimated by - standard deviations, one can use the method of combination of variances - to get the uncertainty on the errors -\begin_inset CommandInset citation -LatexCommand cite -key "GUM" -literal "false" - -\end_inset - -, -\begin_inset Formula -\begin{equation} -u(e_{i})=\sqrt{u(r_{i})^{2}+u(c_{i})^{2}}\label{eq:ue-def} -\end{equation} - -\end_inset - -where -\begin_inset Formula $u(x)$ -\end_inset - - is the uncertainty on -\begin_inset Formula $x$ -\end_inset - -. - This formula assumes that the individual errors on the reference data and - calculated values are uncorrelated. - For an experimental reference value -\begin_inset Formula $r_{i}$ -\end_inset - -, -\begin_inset Formula $u(r_{i})$ -\end_inset - - would typically be a measurement uncertainty. - For a computed reference value -\begin_inset Formula $r_{i}$ -\end_inset - - and for a calculated value -\begin_inset Formula $c_{i}$ -\end_inset - -, uncertainty might come from numerical uncertainty due to the use of finite - precision arithmetics and discretization errors -\begin_inset CommandInset citation -LatexCommand cite -key "Janes2011,Cances2017" -literal "false" - -\end_inset - -, statistical uncertainty ( -\emph on -e.g. -\emph default -, for Monte Carlo methods -\begin_inset CommandInset citation -LatexCommand cite -key "Reynolds_1982,Cailliez2011" -literal "false" - -\end_inset - -), or parametric uncertainty ( -\emph on -e.g. -\emph default -, for calibrated methods -\begin_inset CommandInset citation -LatexCommand citep -key "Mortensen2005,Cailliez2011,Pernot2017b,Bakowies2019,Bakowies2020" -literal "false" - -\end_inset - -). -\end_layout - -\begin_layout Standard -We consider here deterministic computational chemistry methods with assumed - low and controlled arithmetic uncertainty. - The uncertainty on errors is then equal to the reference data uncertainty - -\begin_inset Formula $u(e_{i})\equiv u(r_{i})$ -\end_inset - -. - For the sake of generality, the -\begin_inset Formula $u(e_{i})$ -\end_inset - - notation is preserved in the following. -\end_layout - -\begin_layout Paragraph -Correlation. -\end_layout - -\begin_layout Standard -Let us consider a set of -\begin_inset Formula $K$ -\end_inset - - methods -\begin_inset Formula $\left\{ M_{i}\right\} _{i=1}^{K}$ -\end_inset - -. - The covariance -\begin_inset CommandInset citation -LatexCommand citep -key "Snedecor1989" -literal "false" - -\end_inset - - of the error sets for two method can be decomposed as -\begin_inset Formula -\begin{align} -\mathrm{cov}(E_{i},E_{j}) & =\mathrm{cov}(R-C_{i},R-C_{j})\\ - & =\mathrm{var}(R)+\mathrm{cov}(C_{i},C_{j})-\mathrm{cov}(R,C_{i})-\mathrm{cov}(R,C_{j}) -\end{align} - -\end_inset - -where, for brevity, we use shortened notations such as -\begin_inset Formula $E_{i}\equiv E_{M_{i}}$ -\end_inset - -. - It is not possible to predict the sign and amplitude of -\begin_inset Formula $\mathrm{cov}(E_{i},E_{j})$ -\end_inset - - from this decomposition, but a few considerations might be helpful: -\end_layout - -\begin_layout Itemize -when comparing computational chemistry methods, it is very likely that their - prediction sets are strongly positively correlated (covariant). - It is also very likely that the predictions of good methods have a strong - positive covariance with the reference data, if the latter are not dominated - by measurement errors. - Besides, one can expect that the variance of the reference data is of the - same order (possibly larger if there are notable experimental errors) as - the variance/covariances of the predictions. - So, in a typical comparison scenario, -\begin_inset Formula $\mathrm{cov}(E_{i},E_{j})$ -\end_inset - - results from the compensation of terms with similar magnitudes, and one - should not expect a null covariance of error sets. -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\color red -even if the predictions of two methods are statistically independent ( -\begin_inset Formula $\mathrm{cov}(C_{1},C_{2})=0$ -\end_inset - -), it is likely (and desirable) that they have positive correlations with - the reference data. - -\begin_inset Formula -\begin{align*} -cov(R,C_{i}) & \le\sqrt{var(R)var(C_{i})}\\ -cov(R,C_{j}) & \le\sqrt{var(R)var(C_{j})}\\ -cov(E_{i},E_{j}) & \ge var(R)-\sqrt{var(R)}(\sqrt{(var(C_{i})}+\sqrt{(var(C_{i})}) -\end{align*} - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Itemize -if reference data uncertainties are larger than prediction errors, the covarianc -e should be dominated by -\begin_inset Formula $\mathrm{var}(R)$ -\end_inset - -, and all error sets should be strongly positively correlated. -\end_layout - -\begin_layout Standard -In the following case studies (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Examples" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), we report and analyze the correlation coefficients between error sets - (normalized covariances) -\begin_inset Formula -\begin{align} -\mathrm{cor}(E_{i},E_{j}) & =\frac{\mathrm{cov}(E_{i},E_{j})}{\sigma_{E_{i}}\sigma_{E_{j}}} -\end{align} - -\end_inset - -where -\begin_inset Formula $\sigma_{E_{i}}$ -\end_inset - -is the standard deviation of the error set -\begin_inset Formula $E_{i}$ -\end_inset - -, assumed finite. - We will show through case studies that the correlation matrix contains - relevant information on the quality of datasets and the proximity of methods. -\end_layout - -\begin_layout Paragraph -Representation. -\end_layout - -\begin_layout Standard -Correlation matrices can be represented by combining a color scheme and - an ellipse model -\begin_inset CommandInset citation -LatexCommand cite -key "Murdoch1996" -literal "false" - -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:cmat-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), such that a blue right-slanted ellipse stands for a positive correlation, - a red left-slanted ellipse for a negative one, and a white (invisible) - disk for a null correlation. - The larger the absolute value of the correlation, the darker the color - and the thinner the ellipse. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/WU2015_CorrMat_Data_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/WU2015_CorrMat_Errors_Spearman0.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:cmat-example" - -\end_inset - -Rank correlation matrices between (a) data sets and (b) errors sets for - case WU2015 (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Zas2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -For the example showcased in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:cmat-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a), one sees that all the datasets -\begin_inset Formula $C_{i}$ -\end_inset - - are all strongly positively correlated. - By contrast, the error sets -\begin_inset Formula $E_{i}$ -\end_inset - - present a more relaxed pattern (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:cmat-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)), with weaker positive correlations, and even a very small negative - correlation for MP2 with all the other error sets. - Having noticed this, one can remark that MP2 data present also smaller - correlation coefficients with other datasets, although this is barely visible - on the figure (the difference bears on the third digit of the correlation - coefficients). - In the following, we present correlation matrices for error sets only. -\end_layout - -\begin_layout Subsection -Statistics, their uncertainty and correlation -\begin_inset CommandInset label -LatexCommand label -name "subsec:Statistics,-their-uncertainty" - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Uncertainty. -\end_layout - -\begin_layout Standard -The value -\begin_inset Formula $s$ -\end_inset - - of a statistic -\begin_inset Formula $S$ -\end_inset - - (MSE, MUE, -\begin_inset Formula $Q_{95}$ -\end_inset - -...) estimated on an error set is generally uncertain, with uncertainty estimated - by its standard error -\begin_inset Formula $u(s)$ -\end_inset - -. - Two main uncertainty sources should be considered: (1) the limited size - -\begin_inset Formula $N$ -\end_inset - - of the reference data sample, and (2) the errors uncertainties, -\begin_inset Formula $u(e_{i})$ -\end_inset - - (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Error-sets,-their" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - Unless the dataset is exhaustive ( -\emph on -e.g. -\emph default -, a dataset containing a property for a complete class of systems), the - first source is always present. - For experimental reference data, the second source is also always present, - but experimental uncertainties are rarely available for large datasets, - and a common practice seems to be to ignore them in the statistical analysis - (although they are often discussed to assess the quality of the dataset). - Some studies considered the effect of representative uncertainty levels - on benchmarking conclusions -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,DeWaele2016,Proppe2017" -literal "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -In Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Estimation-of-the" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, the impact of both uncertainty sources is illustrated on the mean value - (MSE), for which analytical formulae are available. - The strategy to handle reference data uncertainty depends on their distribution. - If the reference data uncertainties are uniform over the dataset, the hypothesi -s of -\emph on -i.i.d. - -\emph default - errors holds, and standard statistical procedures can be applied (unless - one is interested in quantifying specifically model errors -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -). - Otherwise, weighted statistics have to be used -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -, which will not be considered here. - Instead, we assume that datasets should not include data with extreme uncertain -ty values. -\end_layout - -\begin_layout Standard -Simple formulae for standard errors, such as those for the mean (a linear - statistic), are not available for non-linear statistics such as the MUE - or -\begin_inset Formula $Q_{95}$ -\end_inset - -. - Moreover, in order to avoid some of the limitations implied by such formulae - ( -\emph on -e.g. -\emph default -, normality hypothesis), one can use a general method to estimate the standard - error of any statistic: the bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Efron1979,Efron1991,Hesterberg2015" -literal "false" - -\end_inset - -. - It is a Monte Carlo sampling method which consists in random draws with - replacement of -\begin_inset Formula $N'$ -\end_inset - - values from a dataset of size -\begin_inset Formula $N$ -\end_inset - -. - In the standard bootstrap, one uses -\begin_inset Formula $N'=N$ -\end_inset - -, -\emph on -i.e. -\emph default -, the generated samples have the same size than the original set. - The bootstrap has been shown to provide reliable estimations of uncertainties, - but the mean values unavoidably reflect the bias due to the original data - set -\begin_inset CommandInset citation -LatexCommand cite -key "Hesterberg2015" -literal "false" - -\end_inset - -. - In consequence, we estimate in the following the mean values from the original - sample and the uncertainties from the bootstrap samples. - The main limitation of the bootstrap is its hypothesis of -\begin_inset Formula $i.i.d.$ -\end_inset - - data, but it is consistent with our choice to avoid weighted statistics - and to avoid reference datasets with a large uncertainty range. - -\end_layout - -\begin_layout Paragraph -Correlation. -\end_layout - -\begin_layout Standard -The statistics covariance -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - - derives from the mathematical expression of -\begin_inset Formula $S$ -\end_inset - - and from the variances and covariance of the error sets, -\begin_inset Formula $\mathrm{cov}(E_{1},E_{2})$ -\end_inset - -. - To estimate -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - - in the case of a linear statistic, one can directly apply the generalization - of the combination of variances to several model outputs -\begin_inset CommandInset citation -LatexCommand cite -key "GUM-Supp2" -literal "false" - -\end_inset - -. - For the MSE, it is easy to demonstrate that the covariance is transferred - in totality: -\begin_inset Formula $\mathrm{cov}(\overline{e}_{1},\overline{e}_{2})=\mathrm{cov}(E_{1},E_{2})$ -\end_inset - -. - More generally, for linear statistics, -\begin_inset Formula $\mathrm{cov}(E_{1},E_{2})=0\Longrightarrow\mathrm{cov}(s_{1},s_{2})=0$ -\end_inset - -. - For non-linear statistics, such as the MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - -, the combination of covariances is unsuitable, and Monte Carlo strategies - are used. - -\end_layout - -\begin_layout Standard -To illustrate the transfer of correlation from error sets to non-linear - statistics, we performed a Monte Carlo study, detailed in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, with scenarii implying diverse distribution shapes. - A few global trends can be derived from this study, notably that for the - MUE and -\begin_inset Formula $Q_{95},$ -\end_inset - - -\begin_inset Formula $\mathrm{cor}(s_{1},s_{2})$ -\end_inset - - is a convex, positive function of -\begin_inset Formula $\mathrm{cor}(E_{1},E_{2})$ -\end_inset - -. - Moreover, for a given value of -\begin_inset Formula $\mathrm{cor}(E_{1},E_{2})$ -\end_inset - - one observes that -\begin_inset Formula $\mathrm{cor}(MUE_{1},MUE_{2})\ge\mathrm{cor}(Q_{95,1},Q_{95,2})$ -\end_inset - -. - As we explored only a fraction of the possible scenarii for the errors - distributions, these trends should not be considered as general. - Our main point is that the correlation of error sets is at least partially - transferred to the derived statistics, a fact to be considered when comparing - the values of these statistics. -\end_layout - -\begin_layout Subsection -Pair-wise comparison of errors -\end_layout - -\begin_layout Standard -The systematic improvement probability (SIP) between two methods -\begin_inset Formula $M_{i}$ -\end_inset - - and -\begin_inset Formula $M_{j}$ -\end_inset - - is defined as the proportion of systems in the reference set for which - the absolute error decreases when using -\begin_inset Formula $M_{i}$ -\end_inset - - instead of -\begin_inset Formula $M_{j}$ -\end_inset - -. - It is estimated as -\begin_inset Formula -\begin{align} -\mathrm{SIP}_{i,j} & =\frac{D_{i,j}}{N}\\ -D_{i,j} & =\sum_{k=1}^{N}\mathbf{1}_{\Delta_{k}(M_{i},M_{j})<0} -\end{align} - -\end_inset - -where -\begin_inset Formula $\mathbf{1}_{X}$ -\end_inset - - is the indicator function, taking for value 1 if -\begin_inset Formula $X$ -\end_inset - - is true and 0 otherwise, and -\begin_inset Formula -\begin{equation} -\Delta_{k}(M_{i},M_{j})=|e_{k}(M_{i})|-|e_{k}(M_{j})| -\end{equation} - -\end_inset - -Note that, because of the possible presence of ties, one has -\begin_inset Formula $\mathrm{SIP}_{i,j}+\mathrm{SIP}_{j,i}\apprle1$ -\end_inset - -. -\end_layout - -\begin_layout Paragraph -Interpretation. -\end_layout - -\begin_layout Standard -A line of the SIP matrix, provides the SIP values for the corresponding - method over all the other ones. - If a new method -\begin_inset Formula $M_{1}$ -\end_inset - - provides systematic improvement over -\begin_inset Formula $M_{2}$ -\end_inset - -, in the sense that it has smaller absolute errors for all systems in the - reference set, one should have -\begin_inset Formula $\mathrm{SIP}_{1,2}=1$ -\end_inset - -. - Values smaller than 0.5 indicate a degradation. - Note however that -\begin_inset Formula $M_{1}$ -\end_inset - - can achieve small values of the SIP and still have better scores (MUE, - -\begin_inset Formula $Q_{95}$ -\end_inset - -), as a few large improvements might overwhelm many small degradations. - The interest of the SIP indicator is mainly to alert the user that using - a -\begin_inset Quotes eld -\end_inset - -better method -\begin_inset Quotes erd -\end_inset - - -\begin_inset Formula $M_{1}$ -\end_inset - - can lead to a degradation of results with respect to -\begin_inset Formula $M_{2}$ -\end_inset - -, with a probability close to -\begin_inset Formula $(1-\mathrm{SIP}_{1,2})$ -\end_inset - -. - -\end_layout - -\begin_layout Paragraph -Mean SIP. -\end_layout - -\begin_layout Standard -In order to compare and rank a set of -\begin_inset Formula $K$ -\end_inset - - methods, one defines the Mean SIP (MSIP) as the mean value of a line of - the SIP matrix (excluding the diagonal) -\begin_inset Formula -\begin{equation} -\mathrm{MSIP}(M_{i})=\frac{1}{K}\sum_{j=1}^{K}\mathrm{SIP}_{i,j}\,(1-\delta_{ij})\label{eq:MSIP} -\end{equation} - -\end_inset - -The largest MSIP value points to a method which in average provides the - best level of improvement over the other methods in the set. - Note that the MSIP is not transferable for comparisons with new methods - out of its definition set. - -\end_layout - -\begin_layout Paragraph -Representation. -\end_layout - -\begin_layout Standard -In the same spirit as for correlation matrices, we represent SIP matrices - by a combination of color levels and disks. - Here, the color scale goes from blue (0.0) to red (1.0) with a white midpoint - (0.5), and the area of the disks is proportional to the SIP value. - The diagonal is null. - The matrix should be read by row: a row with a majority of red patches - signal a method with good SIP performances. - A contrario, a majority of blue patches on a row indicate a method with - poor SIP performances. - The methods are ordered by decreasing value of MSIP. - -\end_layout - -\begin_layout Standard -Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:SIPMAT-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - provides an example extracted from Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - It shows clearly that BH&HLYP is problematic, with a row of small blue - disks, for this dataset and is systematically and strongly outperformed - by all other methods. - At the opposite, the line for CAM-B3LYP is the only one to contain exclusively - values above 0.5 (reddish disks), albeit CAM-B3LYP does not achieve the - best MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - - scores within this set of methods ( -\emph on -cf. - -\emph default - Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/PER2018_SIPHeatmap.png - lyxscale 25 - width 45text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:SIPMAT-example" - -\end_inset - -SIP matrix for a set of 9 methods compared on the G99 set of enthalpies - (case PER2018, Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - A line with a majority of red patches signals a method with good SIP performanc -es. - The SIP value is color-coded and the area of a disk is proportional to - the corresponding value. - The methods are ordered by decreasing value of MSIP (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:MSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Mean gain and loss. -\end_layout - -\begin_layout Standard -In order to appreciate the amplitude of the possible losses or gains when - switching between two methods, we define the mean gain (MG) as the mean - of the positive values of -\begin_inset Formula $\Delta_{k}(M_{i},M_{j})$ -\end_inset - -, which is only defined if -\begin_inset Formula $\mathrm{SIP}_{i,j}>0$ -\end_inset - -: -\begin_inset Formula -\begin{align} -\mathrm{MG}_{i,j} & =\frac{1}{D_{i,j}}\sum_{k=1}^{N}\mathbf{1}_{\Delta_{k}(M_{i},M_{j})<0}\,\Delta_{k}(M_{i},M_{j})\\ -\mathrm{ML}_{i,j} & =-\mathrm{MG}_{j,i} -\end{align} - -\end_inset - -where the mean loss (ML) is the opposite of the mean gain for the reciprocal - comparison. - These statistics are intended to convey an amplitude of the improvement - of -\begin_inset Formula $M_{i}$ -\end_inset - - over -\begin_inset Formula $M_{j}$ -\end_inset - -: MG is therefore a negative value (corresponding to a decrease of absolute - errors), and ML a positive value. - Moreover, the SIP, MG and ML provide a decomposition of the MUE difference - between two methods: -\begin_inset Formula -\begin{align} -\Delta_{\mathrm{MUE}_{i,j}} & =\mathrm{MUE}(M_{i})-\mathrm{MUE}(M_{j})\\ - & =\mathrm{SIP}_{i,j}*\mathrm{MG}_{i,j}+\mathrm{SIP}_{j,i}*\mathrm{ML}_{i,j} -\end{align} - -\end_inset - -This shows that, except for method pairs with an extreme SIP value, any - MUE difference is the balance between losses and gains distributed over - the systems. - One should not expect that a method with a smaller MUE will systematically - provide better results. - -\end_layout - -\begin_layout Paragraph -ECDF of -\begin_inset Formula $\Delta_{k}(M_{i},M_{j})$ -\end_inset - -. -\end_layout - -\begin_layout Standard -The scores (SIP, MG and ML) can be visualized on a single graph of the Empirical - Cumulated Density Function (ECDF) of the differences of absolute errors - between two methods, as shown in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b). - This example is extracted from Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Borlido2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, on the prediction of band gaps. - It compares mBJ (MUE = 0.50 -\begin_inset space \thinspace{} -\end_inset - -eV) and LDA (MUE = 1.17 -\begin_inset space \thinspace{} -\end_inset - -eV), showing that the large MUE difference ( -\begin_inset Formula $\Delta_{\mathrm{MUE}}$ -\end_inset - -) between these methods is the balance of a mean gain -\begin_inset Formula $\mathrm{MG}=-0.86$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -eV for 85 -\begin_inset space \thinspace{} -\end_inset - -% of the systems (SIP), and a mean loss -\begin_inset Formula $\mathrm{ML}=0.37$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -eV for 15 -\begin_inset space \thinspace{} -\end_inset - -% of the systems. - In the hypothesis of a representative dataset, a user switching from LDA - to mBJ has to accept a 15 -\begin_inset space \thinspace{} -\end_inset - -% risk to see his LDA results degraded in average by 0.37 -\begin_inset space \thinspace{} -\end_inset - -eV, up to 1 -\begin_inset space \thinspace{} -\end_inset - -eV. - -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/BOR2019_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/BOR2019_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:Delta-example" - -\end_inset - -Statistics of absolute errors on band gaps for methods mBJ and LDA (case - BOR2019, Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Borlido2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) and their pair-wise differences: (a) ECDF of two error sets to be compared. - The MUE values are depicted by vertical dotted lines, and the -\begin_inset Formula $Q_{95}$ -\end_inset - - -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\xout off -\uuline off -\uwave off -\noun off -\color none -values -\family default -\series default -\shape default -\size default -\emph default -\bar default -\strikeout default -\xout default -\uuline default -\uwave default -\noun default -\color inherit - by vertical dashed lines; (b) ECDF of the difference of absolute errors - (blue curve). - The green- and red-shaded bands represent 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence intervals for the reported statistics (SIP: systematic improvement - probability; MG: mean gain; ML: mean loss, -\begin_inset Formula $\Delta_{MUE}$ -\end_inset - -: MUE difference). - The orange bar represents an estimated level of uncertainty in the dataset. - It is a visual aid to evaluate the pertinence of the observed differences. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Note that this information is not accessible when considering the ECDFs - of the absolute errors (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)). - For the chosen example, the comparison of these ECDFs might leave the false - impression that mBJ has consistently smaller absolute errors than LDA, - which is an artifact due to the ignorance of data pairing (correlation) - in this representation. - -\end_layout - -\begin_layout Subsection -Pair-wise comparison of statistics -\end_layout - -\begin_layout Subsubsection -The testing framework -\end_layout - -\begin_layout Standard -Using the error sets for two methods -\begin_inset Formula $M_{1}$ -\end_inset - - and -\begin_inset Formula $M_{2}$ -\end_inset - -, one calculates the values -\begin_inset Formula $s_{1}=S(E_{1})$ -\end_inset - - and -\begin_inset Formula $s_{2}=S(E_{2})$ -\end_inset - - of a statistic -\begin_inset Formula $S$ -\end_inset - -. - A cpmmon procedure to compare these two values is to test if their difference - is significantly larger than their combined uncertainty, -\emph on -i.e. -\emph default - -\begin_inset Formula -\begin{equation} -|s_{1}-s_{2}|>\kappa\thinspace u(s_{1}-s_{2})\label{eq:compare} -\end{equation} - -\end_inset - -where -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - - is the uncertainty on the difference, and -\begin_inset Formula $\kappa$ -\end_inset - - is an enlargement factor typically taken as -\begin_inset Formula $\kappa=2$ -\end_inset - - (or 1.96) in metrology -\begin_inset CommandInset citation -LatexCommand citep -key "Kacker2010" -literal "false" - -\end_inset - -. - In the hypothesis of a normal distribution for the statistics difference, - -\begin_inset Formula $\kappa=1.96$ -\end_inset - - corresponds to a confidence level of 95 -\begin_inset space \thinspace{} -\end_inset - -% for a two-sided test, implied by the absolute value in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:compare" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - If one has evidence that the distribution of differences is not normal, - -\begin_inset Formula $\kappa$ -\end_inset - - has to be chosen as the uncertainty enlargement factor providing a 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence interval for this distribution. - If the test is positive, there is less than 5 -\begin_inset space \thinspace{} -\end_inset - -% probability that the difference between -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - - is due to sampling effects. -\end_layout - -\begin_layout Standard -Assuming that -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - - cannot be null nor infinite, it is convenient to recast the test by using - a discrepancy factor -\begin_inset Formula -\begin{equation} -\xi(s_{1},s_{2})=\frac{|s_{1}-s_{2}|}{u(s_{1}-s_{2})}\label{eq:discFac-1} -\end{equation} - -\end_inset - -to be compared to the threshold -\begin_inset Formula $\kappa$ -\end_inset - -. - A probability value ( -\begin_inset Formula $p$ -\end_inset - --value) corresponding to -\begin_inset Formula $\xi$ -\end_inset - - is derived from the cumulated density function of the expected distribution - for -\begin_inset Formula $\xi$ -\end_inset - -. - For instance -\begin_inset Formula -\begin{align} -p_{t} & =1-\Phi_{H}(\xi)\label{eq:pt-1}\\ - & =2*\left(1-\Phi(\xi)\right)\label{eq:pt} -\end{align} - -\end_inset - -where -\begin_inset Formula $\Phi_{H}(.)$ -\end_inset - - is the cumulative distribution function (CDF) of the standard half-normal - distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Leone1961" -literal "false" - -\end_inset - -, and -\begin_inset Formula $\Phi(.)$ -\end_inset - - is the CDF of the standard normal distribution. - The half-normal distribution is used to account for the absolute value - in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:discFac-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The -\begin_inset Formula $t$ -\end_inset - - index of -\begin_inset Formula $p_{t}$ -\end_inset - - refers here to the analogy with the two-sample -\begin_inset Formula $t$ -\end_inset - --test for equal means -\begin_inset CommandInset citation -LatexCommand cite -key "Snedecor1989" -literal "false" - -\end_inset - -. - For testing, the probability threshold corresponding to -\begin_inset Formula $P(\xi>\kappa=1.96)$ -\end_inset - - is -\begin_inset Formula $0.05$ -\end_inset - -. - For -\begin_inset Formula $p_{t}$ -\end_inset - - above this value, one chooses not to reject the hypothesis that the observed - difference between two values is due to chance. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -To overpass the normality hypothesis, one needs to characterize the CDF - of -\begin_inset Formula $\xi$ -\end_inset - -. - As for a given dataset one has single values for -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - -, this requires to generate alternative datasets by some sampling strategy - (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Bootstrap-based-comparison-of" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -In order to be able to estimate -\begin_inset Formula $p_{t}$ -\end_inset - -, one needs to evaluate the uncertainty on the difference of -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - -. - Formally, it can be obtained by the combination of variances -\begin_inset CommandInset citation -LatexCommand cite -key "GUM" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -u(s_{1}-s_{2})=\sqrt{u^{2}(s_{1})+u^{2}(s_{2})-2\mathrm{cov}(s_{1},s_{2})}\label{eq:u-diff-stat} -\end{equation} - -\end_inset - -The usefulness of this formula depends on several assumptions (theoretical - limits of the statistics not within a high probability interval around - their values, symmetry of error intervals... - -\begin_inset CommandInset citation -LatexCommand cite -key "Nicholls2014,Nicholls2016" -literal "false" - -\end_inset - -). - Nevertheless, it shows that the covariance between statistics can have - a major effect on the amplitude of -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - -. - In the limit of very strong positive correlation, the uncertainty on the - difference can become very small, impacting -\begin_inset Formula $\xi(s_{1},s_{2})$ -\end_inset - - and -\begin_inset Formula $p_{t}$ -\end_inset - -. -\end_layout - -\begin_layout Standard -To estimate the effect of correlation on the comparison of scores, we introduce - a variant -\begin_inset Formula $p_{unc}$ -\end_inset - - (uncorrelated) of -\begin_inset Formula $p_{t}$ -\end_inset - -, based on a version of the discrepancy ignoring correlation -\begin_inset Formula -\begin{align} -\xi_{unc}(s_{1},s_{2}) & =\frac{|s_{1}-s_{2}|}{\sqrt{u(s_{1})^{2}+u(s_{2})^{2}}}\label{eq:xiUnc}\\ -p_{unc} & =2*\left(1-\Phi(\xi_{unc})\right)\label{eq:Punc} -\end{align} - -\end_inset - -In the hypothesis of mostly positive correlations for the statistics of - interest (MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -; Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), -\begin_inset Formula $p_{unc}$ -\end_inset - - is expected to overestimate -\begin_inset Formula $p_{t}$ -\end_inset - -. -\end_layout - -\begin_layout Subsubsection -Bootstrap-based comparison of statistics -\begin_inset CommandInset label -LatexCommand label -name "subsec:Bootstrap-based-comparison-of" - -\end_inset - - -\end_layout - -\begin_layout Standard -Several strategies can be considered to compare pairs of statistics -\begin_inset Formula $(s_{1},s_{2})$ -\end_inset - - through a -\begin_inset Formula $p$ -\end_inset - --value. -\end_layout - -\begin_layout Paragraph -Estimate -\begin_inset Formula $u(s_{1})$ -\end_inset - -, -\begin_inset Formula $u(s_{2})$ -\end_inset - - and -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -The uncertainty on the statistics of interest (except for the MSE and RMSD) - and their covariance are not, to our knowledge, available in analytical - form. - In consequence, one has to use a numerical procedure, such as the bootstrap - to estimate them -\begin_inset CommandInset citation -LatexCommand cite -key "Efron1979,Hesterberg2015" -literal "false" - -\end_inset - -. - The application of the bootstrap to individual terms of Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:u-diff-stat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - will result in an accumulation of statistical uncertainties. - Besides, the estimation of covariances is very sensitive to outliers. - This approach is clearly suboptimal and is not recommended. -\end_layout - -\begin_layout Paragraph -Estimate directly -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -A better approach in the present context is to estimate directly (by bootstrap) - the uncertainty on the difference of scores. - This relieves some distributional hypotheses in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:u-diff-stat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, and enables the explicit correlation of samples of -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - - through paired-data sampling. - However, estimating a discrepancy factor leads us to use Eq. -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:pt" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - to estimate the -\begin_inset Formula $p$ -\end_inset - --value, with the associated normality hypothesis. - -\begin_inset Float algorithm -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: Two paired error sets -\begin_inset Formula $E_{1}$ -\end_inset - -, -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, a statistic estimator -\begin_inset Formula $S$ -\end_inset - -, and a number of bootstrap samples -\begin_inset Formula $B$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Bootstrap the statistics difference -\end_layout - -\begin_deeper -\begin_layout Enumerate -For -\begin_inset Formula $j=1:B$ -\end_inset - - -\end_layout - -\begin_deeper -\begin_layout Enumerate -Generate a -\begin_inset Formula $N$ -\end_inset - --sample of paired data with replacement -\begin_inset Formula $\longrightarrow\left(E_{1}^{*},E_{2}^{*}\right)$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Estimate -\begin_inset Formula $d_{j}=S(E_{1}^{*})-S(E_{2}^{*})$ -\end_inset - - -\end_layout - -\end_deeper -\end_deeper -\begin_layout Enumerate -Calculate a generalized -\begin_inset Formula $p$ -\end_inset - --value to test -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - - -\end_layout - -\begin_deeper -\begin_layout Plain Layout -\begin_inset Formula $p_{g}=2\min(p^{*},1-p^{*})$ -\end_inset - -, where -\begin_inset Newline newline -\end_inset - - -\begin_inset Formula $p^{*}=(A+0.5C)/B$ -\end_inset - - -\begin_inset Newline newline -\end_inset - - -\begin_inset Formula $A=\sum_{i=1}^{B}1_{d_{i}<0}$ -\end_inset - - -\begin_inset Newline newline -\end_inset - - -\begin_inset Formula $C=\sum_{i=1}^{B}1_{d_{i}=0}$ -\end_inset - - -\end_layout - -\end_deeper -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:methodM" - -\end_inset - -Method M: testing the equality of a statistic -\begin_inset Formula $S$ -\end_inset - - for two paired samples by bootstrap and a generalized -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}$ -\end_inset - -) -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Generalized -\begin_inset Formula $p$ -\end_inset - --value. - -\end_layout - -\begin_layout Standard -The use of the generalized -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}$ -\end_inset - -), as proposed by Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997,Wilcox2012" -literal "false" - -\end_inset - - (method M; -\emph on -cf. - -\emph default - Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), conveniently avoids to estimate -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - -, and the incurring normality hypothesis of -\begin_inset Formula $p_{t}$ -\end_inset - -. - It is based on a simple counting of null and negative bootstrapped differences - of statistics with paired samples. - If -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - -, one expects that the bootstrap sample will generate positive and negative - values of their difference in equal amounts. - In this case, -\begin_inset Formula $p^{*}\simeq1-p^{*}\simeq0.5$ -\end_inset - - and -\begin_inset Formula $p_{g}$ -\end_inset - - is close to 1. - Note that the null values in the differences sample are shared equally - between the positive and negative values. - On the opposite, if there is a small proportion -\begin_inset Formula $p^{*}$ -\end_inset - - of negative values, the mean of the differences sample should be positive, - different from zero. - The smaller -\begin_inset Formula $p^{*}$ -\end_inset - - the farther the mean from zero, and the lower the probability of the null, - -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - - hypothesis. - The symmetric case occurs for large values of -\begin_inset Formula $p^{*}$ -\end_inset - - (small values of -\begin_inset Formula $1-p^{*}$ -\end_inset - -). - As the sign of the difference is irrelevant, a factor two is applied to - estimate -\begin_inset Formula $p_{g}$ -\end_inset - -. - The identity of this algorithm with the analytical -\begin_inset Formula $p$ -\end_inset - --value for the comparison of normal samples means is established in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Estimation-of--values" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -The use of paired samples is essential to capture inter-statistics correlations. - Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - have shown that their method M provides a well controlled level of type - I errors (false positive) for the comparison of quantiles at the 0.05 level. - They estimated that dataset sizes of -\begin_inset Formula $N\ge30$ -\end_inset - - are necessary when comparing quantiles up to 0.9. - Using the same protocol, we estimated that for the comparison of -\begin_inset Formula $Q_{95}$ -\end_inset - - values at the same 0.05 level, -\begin_inset Formula $N\ge60$ -\end_inset - - is requested. - Details are presented in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Type-I-error" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\color orange -In Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Examples" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, we show through simulation how the third strategy improves over the second - one. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsubsection -Rank inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - -\begin_inset CommandInset label -LatexCommand label -name "subsec:Rank-inversion-probability" - -\end_inset - - -\end_layout - -\begin_layout Standard -In a previous article -\begin_inset CommandInset citation -LatexCommand citep -key "Pernot2018" -literal "false" - -\end_inset - -, we defined a ranking inversion probability -\begin_inset Formula -\begin{equation} -P_{inv}=P(S_{1}s_{2})\label{eq:defPinv} -\end{equation} - -\end_inset - -based on the normal distribution. - Using Equations -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:xiUnc" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -- -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:Punc" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, -\begin_inset Formula $P_{inv}$ -\end_inset - - can be reformulated as -\begin_inset Formula -\begin{align} -P_{inv} & =\Phi(0;\mu=s_{1}-s_{2},\sigma=\sqrt{u^{2}(s_{1})+u^{2}(s_{2})})\\ - & =\Phi(0;\mu=\xi_{unc})\\ - & =\Phi(-\xi_{unc})\\ - & =1-\Phi(\xi_{unc})\\ - & =p_{unc}\thinspace/\thinspace2 -\end{align} - -\end_inset - -where the unspecified parameters of the normal cumulative distribution function - -\begin_inset Formula $\Phi(x;\mu,\sigma)$ -\end_inset - - are their standard values ( -\begin_inset Formula $\mu=0$ -\end_inset - -, -\begin_inset Formula $\sigma=1$ -\end_inset - -). - The link to -\begin_inset Formula $p_{unc}$ -\end_inset - - shows the limitations of our previous definition of -\begin_inset Formula $P_{inv}$ -\end_inset - -, -\emph on -i.e. -\emph default -, the normality hypothesis and the neglect of error sets correlations. - -\end_layout - -\begin_layout Standard -Using the same difference statistics used for -\begin_inset Formula $p_{g}$ -\end_inset - - (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), one can generalize Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:defPinv" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - by defining -\begin_inset Formula $P_{inv}$ -\end_inset - - as the probability to have differences in the bootstrap sample with a sign - opposite to the reference one ( -\begin_inset Formula $\mathrm{sign}(s_{1}-s_{2})$ -\end_inset - -) -\begin_inset Formula -\begin{align} -P_{inv} & =\frac{1}{B}\left(\sum_{i=1}^{B}1_{\mathrm{sign}(d_{i})\ne\mathrm{sign}(s_{1}-s_{2})}-\sum_{i=1}^{B}1_{d_{i}=0}\right)\label{eq:pinv-new} -\end{align} - -\end_inset - -where -\begin_inset Formula $B$ -\end_inset - - is the number of bootstrap samples and the null differences (with sign - 0) are compensated for. - Enforcing the condition -\begin_inset Formula $s_{1}>s_{2}$ -\end_inset - - in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:defPinv" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, one gets -\begin_inset Formula $\mathrm{sign}(s_{1}-s_{2})=1$ -\end_inset - -, and finally -\begin_inset Formula -\begin{align} -P_{inv} & =\frac{1}{B}\left(\sum_{i=1}^{B}1_{\mathrm{sign}(d_{i})\ne1}-\sum_{i=1}^{B}1_{d_{i}=0}\right)\\ - & =\frac{1}{B}\left(\sum_{i=1}^{B}1_{d_{i}\le0}-\sum_{i=1}^{B}1_{d_{i}=0}\right)\\ - & =\frac{1}{B}\sum_{i=1}^{B}1_{d_{i}<0}\\ - & \simeq p_{g}\thinspace/\thinspace2\label{eq:pinv-vs-pg} -\end{align} - -\end_inset - -where the relation to -\begin_inset Formula $p_{g}$ -\end_inset - - (Algoritm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) assumes a negligible probability to have null statistics differences and - exploits the fact that -\begin_inset Formula $\sum_{i=1}^{B}1_{d_{i}<0}<\sum_{i=1}^{B}1_{d_{i}>0}$ -\end_inset - - if -\begin_inset Formula $s_{1}>s_{2}$ -\end_inset - -. - -\begin_inset Float algorithm -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: -\begin_inset Formula $K$ -\end_inset - - paired error sets, -\begin_inset Formula $E_{1},\ldots,E_{K}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, a statistic estimator -\begin_inset Formula $S$ -\end_inset - -, and a number of bootstrap samples -\begin_inset Formula $B$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Bootstrap the ranks -\end_layout - -\begin_deeper -\begin_layout Enumerate -For -\begin_inset Formula $j=1:B$ -\end_inset - - -\end_layout - -\begin_deeper -\begin_layout Enumerate -Generate a -\begin_inset Formula $N$ -\end_inset - --sample of paired data with replacement -\begin_inset Formula $\longrightarrow\left(E_{1}^{*},\ldots,E_{K}^{*}\right)$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Estimate the statistics vector -\begin_inset Formula $S^{*}=\left(S(E_{1}^{*}),\ldots,S(E_{K}^{*})\right)$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Estimate the ranks by increasing order of -\begin_inset Formula $S^{*}$ -\end_inset - -: -\begin_inset Formula $O_{j}^{*}=\mathrm{order}(S^{*})$ -\end_inset - -, -\begin_inset Newline newline -\end_inset - -where -\begin_inset Formula $O_{j}^{*}$ -\end_inset - - is a -\begin_inset Formula $K$ -\end_inset - --vector of integer values. - -\end_layout - -\end_deeper -\end_deeper -\begin_layout Enumerate -Estimate for each method its probability to have any rank -\end_layout - -\begin_deeper -\begin_layout Plain Layout -\begin_inset Formula -\[ -P_{r,jk}=\frac{1}{B}\sum_{i=1}^{B}1_{O_{ij}^{*}=k} -\] - -\end_inset - - -\end_layout - -\end_deeper -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:bs-rank" - -\end_inset - -Estimating the rank probabilities for a set of methods. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsubsection -Ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - -\begin_inset CommandInset label -LatexCommand label -name "subsec:Ranking-probability-matrix" - -\end_inset - - -\end_layout - -\begin_layout Standard -A measure of the reliability of a statistic-based ranking can be estimated - by bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Hall2009" -literal "false" - -\end_inset - -. - This approach has notably been used by Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - - to study how the sample size affects the probability for a DFA to be ranked - at first place on the basis of its prediction uncertainty. - We apply it here to compute, for a set of -\begin_inset Formula $K$ -\end_inset - - methods scored by a statistic -\begin_inset Formula $S$ -\end_inset - -, a ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - giving, for each method, its probability to have any rank -\begin_inset Formula -\begin{equation} -P_{r,jk}=P(\mathrm{rank}(S_{j})=k);\thinspace j,k=1,\ldots,K -\end{equation} - -\end_inset - -The algorithm to generate this matrix is described in Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:bs-rank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\end_layout - -\begin_layout Paragraph -Representations. -\end_layout - -\begin_layout Standard -Two representations for this matrix are used by Hall and Miller -\begin_inset CommandInset citation -LatexCommand cite -key "Hall2009" -literal "false" - -\end_inset - -, either a combined color-levels -\begin_inset space \thinspace{} -\end_inset - -/ -\begin_inset space \thinspace{} -\end_inset - -symbol-size image (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:bsRank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), or a summary by mode and probability intervals (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:bsRank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)). - In the following, we will use mostly the levels image representation which - we find easier to read and interpret. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -A summary in results tables can also be considered, by reporting for each - method its mode in ranking probability and the corresponding probability, - which indicates the strength of this rank. - -\end_layout - -\end_inset - - -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/PER2018_figRanks_mue_levels.png - lyxscale 25 - height 5cm - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/PER2018_figRanks_mue_ci.png - lyxscale 25 - height 5cm - BoundingBox 0bp 170bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:bsRank" - -\end_inset - -Graphical representations of a MUE ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - : (left) color levels image of the ranking probability matrix; (right) - summary of the ranking probability matrix by the modes (diamonds) and 90 -\begin_inset space \thinspace{} -\end_inset - -% probability intervals. - The data are taken from the case PER2018 (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - Both representations indicate a possible ranking inversion between B97-1, - CAM-B3LYP and PBE0, -\emph on -i.e. -\emph default -, the reference ranking based on the MUE is not certain for this trio. - Similar problems occur within two other groups, notably BLYP and PW86PBE. - The ranks of PBE (8) and BH&HLYP (9) are well established. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Remarks. -\end_layout - -\begin_layout Itemize -As discussed by Hall and Miller -\begin_inset CommandInset citation -LatexCommand cite -key "Hall2009" -literal "false" - -\end_inset - -, the standard bootstrap used in the present article ( -\begin_inset Formula $N$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - sampling) tends to underestimate the dispersion of the ranks. - Better estimates would be obtained by a -\begin_inset Formula $N'$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - sampling ( -\begin_inset Formula $N' - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Dataset Code -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Property -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $N$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $K$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Source -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PER2018 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Intensive atomization energies -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -222 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -9 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018,Pernot2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BOR2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Band gaps -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -471 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -15 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand citep -key "Borlido2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -NAR2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Enthalpies of formation -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -469 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Narayanan2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -CAL2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -London Dispersion Corrections -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -41 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3*10 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Caldeweyher2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -JEN2018 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Non-covalent interaction energies -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -66 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Jensen2018" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DAS2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Dielectric Constants -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -23 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Das2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -THA2015 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Polarizability -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -135 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -7 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -WU2015 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Polarizability -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -145 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -34 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Wu2015b" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -ZAS2019 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Effective atomization energies -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6211 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset CommandInset citation -LatexCommand cite -key "Zaspel2019" -literal "false" - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:Case-studies" - -\end_inset - -Case studies: -\begin_inset Formula $N$ -\end_inset - - is the number of systems in the dataset and -\begin_inset Formula $K$ -\end_inset - - is the number of compared methods. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -PER2018 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Pernot2018" - -\end_inset - - -\end_layout - -\begin_layout Standard -We consider here the intensive atomization energies -\begin_inset CommandInset citation -LatexCommand cite -key "Perdew2016" -literal "false" - -\end_inset - - estimated with 9 DFAs on the G3/99 dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Curtiss2000" -literal "false" - -\end_inset - -, and extracted from a recent article by Pernot and Savin -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018,Pernot2019" -literal "false" - -\end_inset - -. - This medium-sized dataset ( -\begin_inset Formula $N=222$ -\end_inset - -) presents several non-normal error distributions, and was used to illustrate - the interest for benchmarks of using -\begin_inset Formula $Q_{95}$ -\end_inset - - as a complement to the MUE, and to illustrate our former definition of - -\begin_inset Formula $P_{inv}$ -\end_inset - -. - Here we focus on the correlations and their impact on the comparison of - statistics. - -\begin_inset Float figure -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/PER2018_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32col% - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/PER2018_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32col% - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/PER2018_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32col% - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/PER2018_HistCorrs.png - lyxscale 20 - width 99col% - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:pernot1" - -\end_inset - -Case PER2018 - correlations: (top) rank correlation matrices between Errors - sets, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -; (bottom) histogram of non-diagonal elements of the corresponding correlation - matrices. - The methods are ordered by a clustering algorithm using the complete linkage - method -\begin_inset CommandInset citation -LatexCommand cite -key "Defays1977" -literal "false" - -\end_inset - - implemented in the -\family typewriter -R -\family default - function -\family typewriter - hclust -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "CiteR" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The correlation matrices between the error sets and their statistics are - represented in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, along with histograms of their non-diagonal elements. - The errors sets are all positively correlated, with a wide distribution - of correlation coefficients, except for pairs involving BH&HLYP which presents - negative correlations with four other methods. - When considering the scores, all correlations are positive or null. - Globally, the correlations are weaker for -\begin_inset Formula $Q_{95}$ -\end_inset - - than for the MUE, except for a few pairs. - The maximum of the histograms shifts from 0.6 for MUE to 0 for -\begin_inset Formula $Q_{95}$ -\end_inset - -, but large correlation values are nevertheless still observed for -\begin_inset Formula $Q_{95}$ -\end_inset - -. - These observations confirm the main trends from the numerical study of - correlation transfer in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\begin_inset Float table -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{unc}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{g}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{unc}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $p_{g}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.18(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.5(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.53(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.05(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.48(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B97-1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.85(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BH&HLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.8(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -11.7(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.06(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.95(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --4.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.6(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5.3(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.43(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.77(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.2(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.6(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -CAM-B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.90(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.64 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.29 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.1(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.74(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.09(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.03 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.3(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.65(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.43(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.1(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.44(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.8(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -8.1(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.81(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.92(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.44 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.24 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.12 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.3(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.02 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.50(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.74(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PW86PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.6(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.1(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.43(6) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:pernot" - -\end_inset - -Case PER2018 - absolute error statistics: -\begin_inset Formula $p$ -\end_inset - --values, inversion probabilities and SIP statistics for comparison with - the DFA of smallest MUE (B97-1). - The best scores and the values for which -\begin_inset Formula $p_{g}>0.05$ -\end_inset - - are in boldface. - The SIP, MG and ML columns correspond to the B97-1 row of the corresponding - matrices. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The statistics are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - Note that, due to the use of a different quantile estimation algorithm, - the values of -\begin_inset Formula $Q_{95}$ -\end_inset - - have changed slightly from the values reported in the original article - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -There is a group of three methods (B97-1, CAM-B3LYP and PBE0) with small - MUE values. - Considering the -\begin_inset Formula $p_{g}$ -\end_inset - - values, one cannot reject the hypothesis that the observed differences - are due to the limited size of the datasets. - Note that the same conclusion would have been reached when ignoring correlation - ( -\begin_inset Formula $p_{unc}$ -\end_inset - -), as the neglect of correlation increases the -\begin_inset Formula $p$ -\end_inset - --values, but no other one reaches the 0.05 threshold. - However, the -\begin_inset Formula $p_{unc}$ -\end_inset - - value for LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE reaches 0.03, not far from the threshold. - Consistently, the MUE inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - computed in the reference article -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2019" -literal "false" - -\end_inset - -, included LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE in the group of methods with a sizable risk of inversion. - As demonstrated in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:pinv-vs-pg" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, the revised version of -\begin_inset Formula $P_{inv}$ -\end_inset - - accounting for correlations is now practically equal to -\begin_inset Formula $p_{g}/2$ -\end_inset - -, which rejects LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE as a contender for the head group. - -\end_layout - -\begin_layout Standard -When picking B97-1 instead of CAM-B3LYP based on the MUE, there is a 29 -\begin_inset space \thinspace{} -\end_inset - -% chance to be wrong, -\emph on -i.e. -\emph default -, that the MUE of CAM-B3LYP is indeed smaller than B97-1 due to the dataset - size. - This risks falls to 12 -\begin_inset space \thinspace{} -\end_inset - -% for PBE0. -\end_layout - -\begin_layout Standard -The situation is different for -\begin_inset Formula $Q_{95}$ -\end_inset - -, where the neglect of correlation would lead to the conclusion that PBE0 - (3.3(5) -\begin_inset space \thinspace{} -\end_inset - -kcal/mol) is not significantly distinct from B97-1 (2.7(4) -\begin_inset space \thinspace{} -\end_inset - -kcal/mol; -\begin_inset Formula $p_{unc}=0.33$ -\end_inset - -) whereas the correct value is given by -\begin_inset Formula $p_{g}=0.02$ -\end_inset - -. - In this example, -\begin_inset Formula $Q_{95}$ -\end_inset - - can help us to rank the three best methods, for which the MUE is not discrimina -nt. - This is linked to the presence of different tails in the absolute errors - distributions (cf. - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)). -\end_layout - -\begin_layout Standard -This example illustrates and confirms the relations between -\begin_inset Formula $p_{unc}$ -\end_inset - -, -\begin_inset Formula $p_{g}$ -\end_inset - - and -\begin_inset Formula $P_{inv}$ -\end_inset - - expressed in Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Rank-inversion-probability" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - In the following examples, only -\begin_inset Formula $P_{inv}$ -\end_inset - - is reported. -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP analysis brings another view on the head trio (B97-1, CAM-B3LYP - and PBE0), as the method with the highest MSIP is CAM-B3LYP. - One can see on the SIP matrix in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:SIPMAT-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, that indeed, the row for CAM-B3LYP is fully reddish, when those for B97-1 - and PBE0 present also blue and white patches. - We note also that B97-1 provides a nearly full improvement over BH&HLYP - (SIP = 0.95(2)). -\end_layout - -\begin_layout Standard -The ECDF of the difference of absolute errors for CAM-B3LYP and B97-1 helps - to understand the contradiction between the MUE and MSIP ranks (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)). - The MUE difference for this pair is statistically not significant ( -\begin_inset Formula $p_{g}=0.57$ -\end_inset - -), the SIP value for CAM-B3LYP over B97-1 is 0.67 (1-0.33), the mean gain - -0.6 kcal/mol and the mean loss 1.3 kcal/mol, due to the heavy tail in the - CAM-B3LYP error distribution (these numbers correspond to the reciprocal - comparison of the one presented in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:pernot" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - So by switching from B97-1 to CAM-B3LYP, one would have to accept a 33 -\begin_inset space \thinspace{} -\end_inset - -% risk to degrade the intensive atomization energies by 1.3 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol in average and up to 4 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol, but one would improve the estimations in 67 -\begin_inset space \thinspace{} -\end_inset - -% of the cases by 0.6 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol in average. - The same comparison between CAM-B3LYP and PBE0 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot1-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)) shows that there is no strong basis to favor either method. - -\end_layout - -\begin_layout Standard -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/PER2018_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/PER2018_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/PER2018_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:pernot1-1" - -\end_inset - -Case PER2018 - absolute errors statistics: (a) ECDF and statistics of absolute - errors; (b-c) ECDF and statistics of the difference of absolute errors. - See Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details. - The light orange band depicts the chemical accuracy (1 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -The ranking probability matrices (Figs -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:bsRank" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - and -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:pernot2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) confirm the above analysis. - The group of three methods (B97-1, CAM-B3LYP and PBE0) at the top of the - MUE ranking presents a blurred image (no clear diagonal), whereas the first - -\begin_inset Formula $Q_{95}$ -\end_inset - - rank of B97-1 is not ambiguous. - As expected, the MSIP ranking favors solidly CAM-B3LYP. - Globally, B97-1 should be preferred to minimize the risk of large errors, - where CAM-B3LYP would provide overall smaller absolute errors. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/PER2018_figRanks_q95hd_levels.png - lyxscale 25 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/PER2018_figRanks_msip_levels.png - lyxscale 25 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:pernot2" - -\end_inset - -Case PER2018: ranking probability matrix for (a) -\begin_inset Formula $Q_{95}$ -\end_inset - - and (b) MSIP. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -\noindent -BOR2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Borlido2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -Band gap estimations for a set of 471 systems -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -The original dataset contains 472 systems, but several values are missing - for -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -ce{NaYbP2S6} -\end_layout - -\end_inset - -, which was excluded. -\end_layout - -\end_inset - - by 15 DFAs were extracted from the Supplementary Information of a recent - article by Borlido -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand citep -key "Borlido2019" -literal "false" - -\end_inset - -. - For a full description of the dataset, we refer the reader to the original - article. - -\end_layout - -\begin_layout Standard -The reference authors reported and analyzed relative errors, but as there - is a large range of band gaps in this set this causes a dispersion of relative - errors over six orders of magnitude, and an unsuitable distortion of the - errors distributions, with large relative errors for small band gaps, and - small relative errors for large band gaps. - It is true that for some methods ( -\emph on -e.g. -\emph default -, LDA) the errors increase with the value of the band gap, but this is due - mostly to a systematic deviation (trend), not to an increase in the dispersion - of the errors. - In consequence, we chose to treat here the errors as defined in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:errors-def" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -Borlido -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand citep -key "Borlido2019" -literal "false" - -\end_inset - - discuss the uncertainties on the reference band gaps in their dataset and - estimate it to a few tenths of eV. - Without more detailed information, we assume that this represents a uniform - uncertainty for the dataset. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -One sees in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlido1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - that across the spectrum of methods, all error sets correlation coefficients - are positive, and can reach very large values, up to 0.998. - Only about 30 -\begin_inset space \thinspace{} -\end_inset - -% of the dataset pairs have correlation coefficients below 0.6, involving - notably PBE0_mix and HSE_mix. - If the error sets are dominated by method errors ( -\emph on -i.e. -\emph default -, there are no large reference data errors, nor outliers), the correlation - matrix can be used to infer a clustering of methods, describing the relationshi -ps of the methods for the current property/dataset. - Error sets with large correlation coefficients are related by a linear - or monotonous transformation and the corresponding methods are clustered - together. - The presence of well delimited clusters indicates that the error sets are - not dominated by reference data errors. - From the correlation matrix, the clusters would be (HLE16, HLE16+SOC), - (BJ, SCAN, LDA, PBE, PBE_SOL, LDA+SOC, PBE+SOC), (HSE_mix, PBE0_mix) and - (HSE06,PBE0). - mBJ and HSE14 stay alone. - This clustering seems to produce blocks that correspond to physical intuition: - LDA, PBE, SCAN, ... - have all an electron-gas background. - This is relaxed for HLE16 that differs fro HLE16+SOC only by taking into - account spin-orbit coupling. - These methods are further decoupled from hybrid methods (PBE0, HSE06). - -\begin_inset Float figure -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/BOR2019_Cormat_Errors_Spearman.png - lyxscale 40 - width 50text% - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlido1" - -\end_inset - -Case BOR2019 - rank correlation between errors sets. - The methods are ordered by a clustering algorithm using the complete linkage - method -\begin_inset CommandInset citation -LatexCommand cite -key "Defays1977" -literal "false" - -\end_inset - - implemented in the -\family typewriter -R -\family default - function -\family typewriter - hclust -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "CiteR" -literal "false" - -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The values are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:borlido" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - Although mBJ presents the smallest MUE (0.50(2) -\begin_inset space \thinspace{} -\end_inset - -eV), the value for HSE06 is very close (0.53(5) -\begin_inset space \thinspace{} -\end_inset - -eV), and one cannot exclude that the difference is due to a mere sampling - effect ( -\begin_inset Formula $p_{g}\simeq2P_{inv}=0.16$ -\end_inset - -). - Besides, HSE06 is the only method with a notably non-zero -\begin_inset Formula $P_{inv}$ -\end_inset - - value with mBJ for the MUE. - mBJ is also the method with the smallest -\begin_inset Formula $Q_{95}$ -\end_inset - -, and no other method is able to challenge this rank. - mBJ has the largest MSIP, but its value is moderate (0.7), indicating that - mBJ does not provide a full systematic improvement over (some of) the other - methods. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -When considering -\begin_inset Formula $Q_{95}$ -\end_inset - -, ignoring the correlation would lead us to conclude that HSE06 is still - a contender to mBJ ( -\begin_inset Formula $p_{unc}=0.1)$ -\end_inset - -, but -\begin_inset Formula $p_{g}$ -\end_inset - - tells us the opposite, -\emph on -i.e. - -\emph default - that mBJ has a significantly better -\begin_inset Formula $Q_{95}$ -\end_inset - - (1.41(7) -\begin_inset space \thinspace{} -\end_inset - -eV) than HSE06 (1.7(2) -\begin_inset space \thinspace{} -\end_inset - -eV). -\end_layout - -\end_inset - - -\begin_inset Float table -placement !t -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -eV -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LDA -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.17(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.2(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.84(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.87(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.41(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LDA + SOC -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.24(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.16(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.86(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.92(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.38(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.05(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.41(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.82(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.76(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.40(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE + SOC -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.12(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.83(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.82(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.37(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE_SOL -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.12(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.1(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.83(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.82(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.42(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HLE16 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.60(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.9(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.44(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.23(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HLE16 + SOC -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.65(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.48(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -BJ -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.79(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.55(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.75(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.49(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.31(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -mBJ -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.50(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -1.41(7) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.69(2) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -SCAN -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.81(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.4(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.55(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.74(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.53(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HSE06 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.53(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.09 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.7(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.68(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.52(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.28(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HSE14 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.63(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.56(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.38(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HSE06_mix -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.64(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.0(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.60(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.58(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.51(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.36(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.78(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.44(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.72(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.57(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.46(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0_mix -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.82(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.4(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.67(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.37(3) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:borlido" - -\end_inset - -Case BOR2019 - absolute error statistics: inversion probabilities and SIP - statistics for comparison with the DFA of smallest MUE (mBJ). - The best scores and the values for which -\begin_inset Formula $(p_{g}=2P_{inv})>0.05$ -\end_inset - - are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP values for mBJ lie between 0.49 and 0.86. - The latter value is against LDA+SOC, which means that for 14 -\begin_inset space \thinspace{} -\end_inset - -% of the systems, LDA+SOC achieves smaller absolute errors than mBJ, despite - its poor scores. - Interestingly, small values, close to 0.5, are also observed against HLS16, - HLSE16+SOC and HSE06, indicating a notable risk of performance loss when - switching from one of these methods to mBJ. -\end_layout - -\begin_layout Standard -As seen previously, when going from LDA to mBJ (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), one has less than 15 -\begin_inset space \thinspace{} -\end_inset - -% chance to perform better using LDA, and the mean gain more than doubles - the mean loss. - By contrast, the comparison of mBJ to HSE06 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlido2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) is an example of undecidability: the MUE difference is not significantly - different from zero, and one has as much to loose as to gain by switching - between both methods. -\end_layout - -\begin_layout Standard -The SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlidoSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) provides a convenient summary of these observations. - The mBJ line is mostly reddish with white spots indicating neutral comparisons. - In contrast, the LDA+SOC line is fully blueish, indicating that it is dominated - by all other methods. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/BOR2019_compareECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/BOR2019_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlido2" - -\end_inset - -Case BOR2019 - absolute errors statistics: (a) ECDF of the absolute errors; - (b) ECDF of the difference of absolute errors for mBJ and HSE06. - See Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details. - -\color teal -The orange band depicts a reasonable level of uncertainty in the dataset - (0.2 -\begin_inset space \thinspace{} -\end_inset - -eV). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/BOR2019_SIPHeatmap.png - lyxscale 35 - width 45text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlidoSIP" - -\end_inset - -Case BOR2019 - SIP matrix. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -Ranking probability matrices for the MUE, -\begin_inset Formula $Q_{95}$ -\end_inset - - and MSIP are presented in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlidoRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a-c). - They illustrate the previous results and show that ranking by MUE beyond - the second place becomes uncertain. - This is even more notable for -\begin_inset Formula $Q_{95.}$ -\end_inset - -. - The MSIP ranking selects the same group of five methods as the MUE ranking, - with some inversions. - At the opposite, an end-group of five methods is rather well ascertained. - -\end_layout - -\begin_layout Standard -These matrices are a convenient tool to visualize the impact of dataset - size on the ranking quality. - We estimated them for reduced error sets ( -\begin_inset Formula $N=235$ -\end_inset - - and -\begin_inset Formula $N=100$ -\end_inset - -), sampled randomly from the original one. - The impact is clearly visible in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:borlidoRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d-i), as the diagonal contributions get weaker when -\begin_inset Formula $N$ -\end_inset - - decreases. - For the MUE, the block of ranks 1 and 2 is quite robust, but the situation - deteriorates for the upper ranks. - For -\begin_inset Formula $Q_{95}$ -\end_inset - -, the first place of mBJ is very stable, but the upper ranks become very - uncertain, up to the last ranks for -\begin_inset Formula $N=100$ -\end_inset - -. - As for the MUE, the MSIP ranking suffers from the reduced datasets, but - a head group of five methods is well preserved. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_mue_levels_sel.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_q95hd_levels_sel.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_msip_levels_sel.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_mue_levels_sel100.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_q95hd_levels_sel100.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/BOR2019_figRanks_msip_levels_sel100.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:borlidoRPmat" - -\end_inset - -Case BOR2019: ranking probability matrices for the full dataset, -\begin_inset Formula $N=471$ -\end_inset - - (a-c), and for random reduced sets -\begin_inset Formula $N=235$ -\end_inset - - (d-f) and -\begin_inset Formula $N=100$ -\end_inset - - (g-i). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -NAR2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Narayanan2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -The dataset contains the calculated enthalpies of formation by G4MP2 for - 469 molecules having experimental values with small uncertainty (Pedley - test set) -\begin_inset CommandInset citation -LatexCommand cite -key "Narayanan2019" -literal "false" - -\end_inset - -. - The G4MP2 values are compared with those of B3LYP, M06-2X and -\begin_inset Formula $\omega$ -\end_inset - -B97X-D. - -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The most remarkable feature of the correlation matrices in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - is the decorrelation of G4MP2 errors from the other error sets. - For the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -, weak positive correlations appear, more notably for -\begin_inset Formula $Q_{95}$ -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/NAR2019_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 300bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/NAR2019_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 300bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/NAR2019_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 300bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:narayanan1" - -\end_inset - -Case NAR2019 - rank correlation matrices: (a) Errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The statistics reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:narayanan" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - show the supremacy of G4MP2 over the three DFAs for all statistics. - Narayanan -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Narayanan2019" -literal "false" - -\end_inset - - claim an -\begin_inset Quotes eld -\end_inset - -accuracy -\begin_inset Quotes erd -\end_inset - - -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -The MUE is sometimes abusively used to characterize the -\emph on -accuracy -\emph default - of a method, which cannot be the case when error distributions are not - zero-centered normal -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Pernot2018" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - (MUE) of 0.79 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol with G4MP2. - However, a look at the absolute errors CDFs (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)) shows that for G4MP2, there is still a probability of about 20 -\begin_inset space \thinspace{} -\end_inset - -% that the absolute errors exceed 1 kcal/mol, and 5 -\begin_inset space \thinspace{} -\end_inset - -% to exceed 2.2 kcal/mol. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -G4MP2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.79(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.21(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.81(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.0(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -9.3(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.22(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.89(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --3.7(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.52(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -M06-2X -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.71(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.1(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.37(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.83(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.5(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.82(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $\omega$ -\end_inset - -B97X-D -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.85(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5.2(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.73(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.62(5) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:narayanan" - -\end_inset - -Case NAR2019 - absolute error statistics: inversion probabilities and SIP - statistics for comparison with the DFA of smallest MUE (G4MP2). - The best scores are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -G4MP2 presents a high degree of systematic improvement over the three DFAs - (MSIP = 0.81). - Nonetheless, there is about 27 -\begin_inset space \thinspace{} -\end_inset - -% probability (1-0.73) that -\begin_inset Formula $\omega$ -\end_inset - -B97X-D performs better, but with a rather small value of ML (0.62 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol), when compared to the chemical accuracy (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d)). - In contrast, the mean gain when using G4MP2 instead of -\begin_inset Formula $\omega$ -\end_inset - -B97X-D is about -1.7 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol for 73 -\begin_inset space \thinspace{} -\end_inset - -% of the systems. - The advantage of G4MP2 over B3LYP is more spectacular (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:narayanan2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/NAR2019_compareECDF.png - lyxscale 20 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/NAR2019_SIPHeatmap.png - lyxscale 20 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/NAR2019_deltaECDF.png - lyxscale 20 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/NAR2019_deltaECDF2.png - lyxscale 20 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:narayanan2" - -\end_inset - -Case NAR2019: (a) ECDF of the absolute errors; (b) SIP matrix; (c,d) ECDF - of the difference of absolute errors of B3LYP and -\begin_inset Formula $\omega$ -\end_inset - -B97X-D with respect to G4MP2 (see Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -CAL2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Caldeweyher2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -The impact of an atomic-charge dependent London dispersion correction (D4 - model) has been evaluated by Caldeweyher -\emph on -at al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Caldeweyher2019" -literal "false" - -\end_inset - - on a large series of datasets. - From those, we selected one of the largest ones, -\emph on -i.e. -\emph default -, the reference energies for the MOR41 transition metal reaction benchmark - set -\begin_inset CommandInset citation -LatexCommand cite -key "Dohm2018" -literal "false" - -\end_inset - -, available as Tables -\begin_inset space \thinspace{} -\end_inset - -14-18 in the Supplementary Information of the reference article. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Reproducibility note: these data are inconsistent with the results reported - in Fig. -\begin_inset space \thinspace{} -\end_inset - -9 of the reference article and the subsequent discussion. - We contacted the corresponding author (S. - Grimme) who kindly sent us a corrected version of the Supplementary Information. - -\end_layout - -\end_inset - - The reference data are calculated values, with a priori no significant - numerical uncertainty. - The London dispersion corrections have been tested on a series of 10 DFAs. - Note that the nomenclature used here for the corrections is the one provided - in the SI table, which differs somewhat from the one used in the reference - article. - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The results are reported in Tables -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:caldeweyher" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -- -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:caldeweyher-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - , where DFT-D3 has been taken as reference throughout for -\begin_inset Formula $P_{inv}$ -\end_inset - - estimation. - The aim here is to check if DFT-D4 brings significant differences. - It is notable that with a set of size 41, the sampling uncertainty is rather - large for both statistics (typically on the second or first digit). - Nevertheless, significant MUE improvements are observed when passing from - DFT-D3 do DFT-D4, except for revPBE and PW6B95. - In the latter case, the better MUE of the D3 calculations, noted by the - reference authors, might be due to a random effect of dataset selection. - Based on -\begin_inset Formula $Q_{95}$ -\end_inset - - the improvements due to D4 are not significant, except for DOD-PBE, DSD-PBE - and RPBE. - Globally, DFT-D4 improves the MUE, but does not reduce the risk of large - errors. - -\begin_inset Float table -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DOD-PBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.1(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.63(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DOD-PBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.1(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.65(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.44(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.28(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.24(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DOD-PBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -3.5(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.13(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.83(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.8(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DSD-PBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.9(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DSD-PBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.9(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -DSD-PBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -3.7(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.29(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.71(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.5(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.7(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -B3LYP-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -4.2(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.07 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.56(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.41(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.22(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.21(3) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -B3LYP-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -4.2(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -11(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.11 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.56(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -B3LYP-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -4.8(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -13(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.26(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.71(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.1(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE0-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE0-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.01 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE0-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -2.6(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.29(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.61(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.4(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PW6B95-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.2(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.02 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7.9(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.56(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -1.0(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PW6B95-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.0(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7.8(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.31 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -1.0(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PW6B95-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -2.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -7.4(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.55(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -CAM-B3LYP-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -9(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.04 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.38(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -CAM-B3LYP-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.7(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -9(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.04 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.38(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -CAM-B3LYP-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -4.3(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -10(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.20(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.76(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.5(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -revPBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.3(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.11 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.33 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.43(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.27(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.28(6) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -revPBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.3(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.39 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -revPBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.8(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.46(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.54(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --2.0(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -1.3(3) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:caldeweyher" - -\end_inset - -Case CAL2019 - absolute error statistics: inversion probabilities are calculated - for comparison with DFT-D3, for each DFT. - The SIP statistics are calculated for comparison with the smallest MUE - within each DFT. - The best scores and the values for which -\begin_inset Formula $p_{g}>0.05$ -\end_inset - - are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Float table -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -M06L-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -5.1(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -13(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -M06L-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -5.1(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -13(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.08 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.35(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -M06L-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -5.5(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -14(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.22(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.71(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.5(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.5(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.34 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.45(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.51(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --0.20(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.16(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.4(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.60(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -PBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -3.9(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.30(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.68(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --1.0(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.5(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -RPBE-D4-ATM -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.4(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -RPBE-D4-MBD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -3.4(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -\size footnotesize -12(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.48(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -RPBE-D3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -8.3(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -20(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.05(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -0.95(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize --5.3(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\size footnotesize -2(1) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:caldeweyher-1" - -\end_inset - -Case CAL2019 - Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:caldeweyher" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, continued. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -Let us consider several examples with the SIP approach: -\end_layout - -\begin_layout Itemize - -\series bold -PBE0-Dn -\series default -. - Inspection of Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:caldeweyher1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a) shows that the 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence interval (CI) for the SIP value of 0.61 for PBE0-D4-ATM over - PBE0-D3 does not exclude the neutral value (0.5), with a tiny advantage - of the mean gain over the mean loss. - One can note also that, despite their large error bars, the small MUE differenc -e -\begin_inset Formula $\Delta_{MUE}$ -\end_inset - - between these two methods is significantly different from 0 (its 95 -\begin_inset space \thinspace{} -\end_inset - -% confidence interval excludes 0), an effect of the correlation between - error sets. - -\end_layout - -\begin_layout Itemize - -\series bold -PW6B95-Dn -\series default -. - This case is an inversion of the previous one, where the confidence interval - on the SIP value of nearly 0.4 (disadvantaging D4) does not exclude the - neutral value, and the CI on the MUE difference -\begin_inset Formula $\Delta_{MUE}$ -\end_inset - - does not exclude 0. - One cannot firmly conclude that the D3 version performs better than the - D4 ones for this DFA. -\end_layout - -\begin_layout Itemize - -\series bold -RPBE-Dn -\series default -. - For this case, one has a rare instance where D4 improves almost systematically - over D3, with a SIP of 0.95(3), and a mean gain overwhelming the mean loss. -\end_layout - -\begin_layout Standard -Except for RPBE-Dn, where the SIP value of D4 over D3 is about 0.95, and - DOD-PBE ( -\begin_inset Formula $\mathrm{SIP}=0.83$ -\end_inset - -), all the estimated SIP values lie near or below 0.75, down to 0.45, meaning - that there is no systematic improvement when passing from D3 to D4. - In several cases, the uncertainty due to the limited set size does not - allow to conclude clearly. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/CAL2019_PBE0_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/CAL2019_PW6B95_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/CAL2019_RPBE_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:caldeweyher1" - -\end_inset - -Case CAL2019 - selected SIP plots. - The orange band depicts the chemical accuracy (1 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -Considering that both DFT-D4 options are mostly indiscernible, we built - global ranking probability matrices for the DFT-D3 and DFT-D4-ATM data. - The results are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:caldewheyerRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(top). - Although the rankings of the Dn options for each DFA are mostly unambiguous, - a global ranking is clearly very uncertain. - Based on the MUE, DOD-PBE-D4-ATM and PBE0-D4-ATM would share the leading - places. - Beyond that, the situation is utterly scrambled, the only clear point being - the last ranks for M06-L-D3 and RPBE-D3. - The picture based on -\begin_inset Formula $Q_{95}$ -\end_inset - - is even less well defined, with no clear leading method within a group - of five. - The MSIP ranking is akin to the MUE ranking. -\end_layout - -\begin_layout Standard -If one restricts the methods to DFT-D4-ATM (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:caldewheyerRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - bottom), the situation is slightly better defined for the leading and tailing - places for the three scores, but remains very undecidable in intermediate - ranks. - This illustrates how, for a given dataset, the uncertainty in ranking is - also affected by the number of methods to be ranked. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/CAL2019_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/CAL2019_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/CAL2019_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/CAL2019_figRanks_mue_levels_D4.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/CAL2019_figRanks_q95hd_levels_D4.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/CAL2019_figRanks_msip_levels_D4.png - lyxscale 20 - width 32col% - BoundingBox 200bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:caldewheyerRPmat" - -\end_inset - -Case CAL2019: ranking probability matrices for (a-c) DFT-D3 and DFT-D4-ATM - methods, and (d-f) DFT-D4-ATM methods only. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -JEN2018 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Jensen2018" - -\end_inset - - -\end_layout - -\begin_layout Standard -This dataset contains non-covalent interaction energies estimated by M06-L - with six different basis sets for 66 systems in the S66 dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Rezac2011,Rezac2011_Erratum" -literal "false" - -\end_inset - -. - This is a part of the results reported in Table 8 of a recent article by - Jensen -\begin_inset CommandInset citation -LatexCommand cite -key "Jensen2018" -literal "false" - -\end_inset - -, and available as Supplementary Information to this article. - This dataset was used by Jensen to study the impact of error cancellations - when using standard or optimized medium-sized basis sets. - Six basis sets are considered (pop2 = 6-31G(d,p), pop3 = 6-311G(2df,2pd), - pcseg-1, pcseg-4, pop2-opt and pcseg1-opt), where the "-opt" ones have - optimized contraction coefficients with respect to the reference data. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The error sets of the "-opt" methods are practically uncorrelated to the - other sets (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensenCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), and in the remaining methods, pcseg4 errors are anti-correlated with - the other ones. - A striking feature of this dataset is that this negative correlation persists - for the MUE, contradicting the trends observed in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Covariance-of-scores" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - Otherwise, the correlations globally weaken for -\begin_inset Formula $Q_{95}$ -\end_inset - -, except for the pop2/pop3 and pcseg1/pcseg1-opt cases, for which the correlatio -n is stronger as the one between the error sets. - -\begin_inset Float figure -placement tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/JEN2018_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/JEN2018_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/JEN2018_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:jensenCorrmat" - -\end_inset - -Case JEN2018 - rank correlation matrices: (a) Errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The statistics in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:jensen" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - show the strong impact of basis-set optimization, both optimized basis - sets provide comparable results for the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -. - All statistics show that the ranking between both "-opt" methods is not - strict. -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kJ/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pop2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.9(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -7.2(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.35(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.77(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.9(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pop3 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.4(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.4(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.74(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pcseg1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.5(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5.6(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.42(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.3(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.9(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pcseg4 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.5(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.8(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.33(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.89(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.8(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.6(2) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pop2-opt -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -1.06(10) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.05 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.6(2) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.24 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.67(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.62(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.66(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.65(9) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -pcseg1-opt -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.90(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -2.5(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.76(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:jensen" - -\end_inset - -Case JEN2018 - absolute error statistics: inversion probabilities and SIP - statistics for comparison with the method of smallest MUE (pcseg1-opt). - The best scores and the values for which -\begin_inset Formula $(p_{g}=2P_{inv})>0.05$ -\end_inset - - are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -They both also stand out by their MSIP, with a slight advantage for pcseg1-opt. - Once again, the importance of error cancellations stands out through the - medium values of the SIP of pcseg1-opt over the other cases. - The strongest improvement is 0.9 over pcseg4, the smallest 0.6 over pop2-opt. - The plots in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensen1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - illustrate these features. - The SIP matrix shows clearly the medium supremacy of the optimized basis - sets, and a slight advantage of pcseg1-opt over pop2-opt. - The major gain when going to pop2 to pop2-opt is visible in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensen1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c) where the medium SIP (~0.7) is compensated by the very small mean loss - (0.6 -\begin_inset space \thinspace{} -\end_inset - -kJ/mol). - In contrast, Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensen1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d) shows that the improvement of pcseg1-opt over pop2-opt is marginal, - with SIP values close to the neutral value (0.5) and symmetrical MG and - ML values. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/JEN2018_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/JEN2018_SIPHeatmap.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/JEN2018_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/JEN2018_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:jensen1" - -\end_inset - -Case JEN2018: (a) ECDF of the absolute errors; (b) SIP matrix; (c,d) ECDF - of the difference of absolute errors of pop2 and pcseg1-opt with respect - to pop2-opt (see Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details). - The orange bar represents a chemical accuracy of 1 -\begin_inset space \thinspace{} -\end_inset - -kJ/mol. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -The leading position of the "-opt" methods is solid and confirmed by our - three scores (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:jensenRanking" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/JEN2018_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/JEN2018_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/JEN2018_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:jensenRanking" - -\end_inset - -Case JEN2018: ranking probability matrices. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -DAS2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Das2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -A set of 24 dielectric constants for 3D metal oxides has been reported by - Das -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Das2019" -literal "false" - -\end_inset - - in their Table 3. - One of the experimental values being unknown, this limits the dataset to - 23 values. - Experimental uncertainties are not specified. - The predictions by six DFAs are reported, three global hybrids (PBE0, B3LYP - and DD-B3LYP) and three range-separated hybrids (SC-BLYP, DD-SCBLYP and - DD-CAM-B3LYP). - This is a small dataset, below the standards required for low type I errors - (false positive) in the comparison of MUE ( -\begin_inset Formula $N>30$ -\end_inset - -) and -\begin_inset Formula $Q_{95}$ -\end_inset - - ( -\begin_inset Formula $N>60$ -\end_inset - -) (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Type-I-error" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The correlation matrices of the errors, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - have uniformly strongly positive elements (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --top). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status collapsed - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/DAS2019_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/DAS2019_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/DAS2019_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/DAS2019_CorrMat_Errors_Spearman_Pruned.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/DAS2019_CorrMat_MUE_Spearman_Pruned.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/DAS2019_CorrMat_Q95_Spearman_Pruned.png - lyxscale 20 - width 32text% - BoundingBox 200bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:dasCorrmat" - -\end_inset - -Case DAS2019 - rank correlation matrices: (a-c) original data set ( -\begin_inset Formula $N=23$ -\end_inset - -); (d-f) after removal of two outliers ( -\begin_inset Formula $N=21$ -\end_inset - -). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - This is an unusual situation when compared to the previous cases. - Knowing that correlation coefficients are sensitive to outliers (even if - rank correlations are a little more robust), we explored the dataset for - outliers. - A parallel plot (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) of the scaled and centered error sets enables to identify systems which - deviate significantly from the core distribution for all methods (global - outliers). - Two such systems exist for all methods: -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -ce{BiVO4} -\end_layout - -\end_inset - - and -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -ce{Cu2O} -\end_layout - -\end_inset - -. - After removal of these two points, the correlation matrix for the errors - is slightly relaxed (the smallest correlation coefficient decreases from - 0.81 to 0.74), but those for MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - are visibly more affected ((Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --bottom)). - In fact, the parallel plot reflects the strong correlations between all - errors sets (many quasi-parallel horizontal lines), except for DD-CAM-B3LYP. - The pruned dataset ( -\begin_inset Formula $N=21$ -\end_inset - -) is used in the following. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status collapsed - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/DAS2019_ParPlot.png - lyxscale 20 - width 40text% - BoundingBox 50bp 0bp 1100bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:dasCorrmat-1" - -\end_inset - -Case DAS2019: parallel plot of scaled and centered error sets, used to identify - global outliers. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -Considering the small size of the sample, few clear-cut conclusions are - possible. - Only DD-CAM-B3LYP stands out significantly, either by its MUE, Q95 and - MSIP values (Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:das" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - At the opposite, although its MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - values are not distinguishable from those of PBE0, B3LYP, SC-BLYP and DD-SC-BLY -P, DD-B3LYP is the worst performer of the group based on the SIP statistics. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -a.u. -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -a.u. -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -a.u. -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -a.u. -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -PBE0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.66(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.6(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.44(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.19(4) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.61(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.4(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.49(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.38(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.21(6) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DD-B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.70(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.30(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.19(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.90(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.41(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.4(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -SC-BLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.58(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.3(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.62(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.76(9) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.36(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.22(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DD-SC-BLYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.68(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.23(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.29(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.90(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.39(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.4(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -DD-CAM-B3LYP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.36(6) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.83(7) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.82(8) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:das" - -\end_inset - -Case DAS2019 - absolute error statistics for the pruned dataset ( -\begin_inset Formula $N=21$ -\end_inset - -): inversion probabilities and SIP statistics for comparison with the DFA - of smallest MUE (DD-CAM-B3LYP). - The best scores are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The latter two methods are clearly identifiable in the SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:das" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), with a full reddish line for DD-CAMB3LYP, and a full blueish line - for DD-B3LYP. - The impact of the small set size on this conclusion is illustrated in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:das" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b,c), where the ECDFs of the differences of absolute errors are plotted - for DD-CAM-B3LYP -\emph on -vs -\emph default -. - B3LYP and DD-B3LYP -\emph on -vs -\emph default -. - B3LYP. - Despite being very large, the error bars on the statistics enable to validate - these conclusions. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/DAS2019_SIPHeatmap_Pruned.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/DAS2019_deltaECDF_Pruned.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/DAS2019_deltaECDF2_Pruned.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:das" - -\end_inset - -Case DAS2019: (a) SIP matrix; (b) ECDF of the difference of absolute errors - of methods DD-CAMB3LYP and B3LYP; (c) idem for DD-B3LYP and B3LYP (see - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -All ranking matrices confirm a solid leading place for DD-CAM-B3LYP (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasRanking-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - The MUE and MSIP rankings would then favor SC-BLYP and B3LYP, in disagreement - with the -\begin_inset Formula $Q_{95}$ -\end_inset - - ranking, for which the three DD-X methods have leading ranks. - An example of a -\begin_inset Formula $N'$ -\end_inset - --out of- -\begin_inset Formula $N$ -\end_inset - - bootstrap ( -\begin_inset Formula $N'=N/3$ -\end_inset - -) is shown on the bottom row. - The uncertainties are slightly larger, notably for the -\begin_inset Formula $Q_{95}$ -\end_inset - - ranks above the first, but the main features are mostly preserved. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/DAS2019_figRanks_mue_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 300bp 450bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/DAS2019_figRanks_q95hd_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 300bp 450bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/DAS2019_figRanks_msip_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 300bp 450bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/DAS2019_figRanks_mue_levels_Pruned_M.png - lyxscale 20 - width 32col% - BoundingBox 300bp 450bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/DAS2019_figRanks_q95hd_levels_Pruned_M.png - lyxscale 20 - width 32col% - BoundingBox 300bp 450bp 1800bp 1700bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/DAS2019_figRanks_msip_levels_Pruned_M.png - lyxscale 20 - width 32col% - BoundingBox 300bp 450bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:dasRanking-1" - -\end_inset - -Case DAS2019 - ranking probability matrices: (a-c) -\begin_inset Formula $N$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - bootstrap; (d-f) -\begin_inset Formula $N/3$ -\end_inset - --out -\begin_inset space \thinspace{} -\end_inset - -of- -\begin_inset Formula $N$ -\end_inset - - bootstrap. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -THA2015 / WU2015 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Thakkar2015" - -\end_inset - - -\end_layout - -\begin_layout Standard -Thakkar -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015" -literal "false" - -\end_inset - - compiled a database of polarizabilities for 135 molecules, from triatomics - to 26-atoms systems. - The experimental data are given with their uncertainty, and computational - results are provided for 7 methods. - Dataset THA2015 for our study was extracted from Tables II-IV of the reference - article. - The raw errors present a dispersion increasing with the polarizability, - hence relative errors are used in the reference article and this study. - -\end_layout - -\begin_layout Standard -The relative uncertainties for the reference experimental data cover a large - range, from 0.09 -\begin_inset space \thinspace{} -\end_inset - -% to 12.4 -\begin_inset space \thinspace{} -\end_inset - -%, the median value is 1.7 -\begin_inset space \thinspace{} -\end_inset - -%. - The authors identified 8 outliers, and a total of 32 systems in need of - further experimental study. - The outliers do not contain the points with the extreme uncertainties, - so that, even after removal of the 32 problematic systems, the range of - relative uncertainties stays the same. - The dispersion of uncertainties would certainly justify the use of weighted - statistics. - This was not the choice of Thakkar -\emph on -et al. -\emph default -, and we proceed with unweighted statistics, keeping in mind that the results - might be influenced by reference data errors instead of model errors. -\end_layout - -\begin_layout Standard -In a complementary study, Wu -\emph on -et al. - -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Wu2015b" -literal "false" - -\end_inset - - calculated polarizabilities for a set of 145 molecules with HF, MP2, CCSD(T) - and 34 DFAs. - In this study, CCSD(T) was used as reference to evaluate the other methods. - In the following, we select the subset of 7 methods common to both datasets - (WU2015). - This enables us to study the impact of the reference data (experimental - -\emph on -vs. - -\emph default - calculated) on the correlation and ranking matrices. - -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The Pearson correlation matrix of the error sets (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)) is uniformly strongly positive. - The smallest CC value is 0.8. - To appreciate the role of data points with large deviations (outliers) - in these strong correlations, we removed a set of 8 outliers identified - by Thakkar -\emph on -et al. -\emph default - -\begin_inset CommandInset citation -LatexCommand cite -key "Thakkar2015" -literal "false" - -\end_inset - - ((Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)). - Most of the correlations weaken notably. - For comparison, the rank correlation matrix was calculated for the full - dataset ((Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)). - This matrix is very similar to the one with outliers removed, illustrating - the better resilience of rank correlations to outliers. - Finally, the errors, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - rank correlation matrices were estimated on the pruned ( -\begin_inset Formula $N=127$ -\end_inset - -) dataset (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d-f)). - The structure of the errors correlation matrix is transferred to the statistics -, with attenuated correlation intensities. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_CorrMat_Errors.png - lyxscale 20 - width 32col% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_CorrMat_Errors_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32col% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_CorrMat_Errors_Spearman_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_CorrMat_MUE_Spearman_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_CorrMat_Q95_Spearman_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar2" - -\end_inset - -Case THA2015 - correlation matrix: (a) Pearson correlation of the full data - set ( -\begin_inset Formula $N=135$ -\end_inset - -); (b) Pearson correlation of the pruned dataset ( -\begin_inset Formula $N=127$ -\end_inset - -); (c) Spearman/rank correlation of the full data set; (d): Errors rank - correlation; (e): MUE rank correlation; (f) -\begin_inset Formula $Q_{95}$ -\end_inset - - rank correlation. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The error, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - rank correlation matrices were also calculated for the full WU2015 dataset - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar2-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - In the absence of reference data uncertainties, MP2 errors are now weakly - anticorrelated to the other error sets, while all DFAs remain positively - correlated. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status collapsed - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/WU2015_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/WU2015_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/WU2015_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 180bp 250bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar2-1" - -\end_inset - -Case WU2015 - rank correlation matrix: (a) Errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The differences between both sets of correlation matrices, notably when - MP2 is concerned, might be due in a large part to the presence of large - experimental errors in the THA2015 dataset. -\end_layout - -\begin_layout Paragraph -Statistics. - -\end_layout - -\begin_layout Standard -The values of MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - for the full THA2015 dataset are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:thakkar" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The MUE values agree with those of the reference article, but the uncertainty - bears on the second digit, showing that a third digit is essentially irrelevant. - The analysis of -\begin_inset Formula $P_{inv}$ -\end_inset - - for the MUE leads us to conclude that there is a group of four methods - (M11, M06-2X, LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH and MP2) with similar performances, which is confirmed by the comparison - of their empirical cumulated distribution functions -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkar1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - These ECDFs overlap over the whole range. - Besides, these methods cannot be discriminated on the basis of their -\begin_inset Formula $Q_{95}$ -\end_inset - - values, as it appears that all values are indiscernible. - These conclusions are unchanged when one removes the 8 outliers identified - by Thakkar -\emph on -et al. - -\emph default -(not shown). - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -% -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -M11 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.1(3) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.34 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.58(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.47(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.4(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.16(10) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -M06-2X -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.2(3) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.09 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.50 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.57(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.53(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.2(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.0(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $\omega$ -\end_inset - -B97 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -11(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.21 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.53(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.59(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --0.94(7) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.72(7) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.0(3) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.30 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.59(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HISS -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.8(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -10(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.38 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.34(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.72(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.62(10) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.5(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -LC- -\begin_inset Formula $\omega$ -\end_inset - -PBE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3.9(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -11(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.25 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.31(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.78(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.39(8) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.2(1) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -MP2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.2(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.22 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -11(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.34 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.56(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.45(4) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.3(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.8(1) -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:thakkar" - -\end_inset - -Case THA2015 - absolute error statistics for the full dataset ( -\begin_inset Formula $N=145)$ -\end_inset - -: inversion probabilities and SIP statistics for comparison with the DFA - of smallest MUE (LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH), except for -\begin_inset Formula $Q_{95}$ -\end_inset - - inversion probability, where the reference is the DFA with smallest -\begin_inset Formula $Q_{95}$ -\end_inset - -. - The best scores and the values for which -\begin_inset Formula $(p_{g}=2P_{inv})>0.05$ -\end_inset - - are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/THA2015_compareECDF_Pruned.png - lyxscale 25 - width 32col% - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/THA2015_compareECDF2_Pruned.png - lyxscale 25 - width 32col% - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkar1" - -\end_inset - -Case THA2015 - ECDFs of absolute relative errors: (a) methods with smallest, - indiscernible, MUE values; (b) other methods. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)) for the THA2015 dataset reveals a leading group of four methods identical - to those identified above. - When passing to WU2015 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarSIP" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)), there is a better discrimination between methods, and MP2 presents - SIP values over all the other methods. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/THA2015_SIPHeatmap_Pruned.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/WU2015_SIPHeatmap.png - lyxscale 25 - width 32text% - BoundingBox 100bp 0bp 1300bp 1200bp - clip - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkarSIP" - -\end_inset - -SIP matrix: (a) case THA2015 ( -\begin_inset Formula $N=127$ -\end_inset - -); (b) case WU2015. - The methods are sorted by decreasing MSIP value. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Ranking. -\end_layout - -\begin_layout Standard -The ranking matrices are plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The top row concerns dataset THA2015. - The ranking probability matrices for the MUE confirm the problem seen above - for the four best methods. - It shows also that the rank of MP2 is quite ill-defined. - For -\begin_inset Formula $Q_{95,}$ -\end_inset - -, as expected, any ranking seems illusory. - The same matrices have been estimated after the removal of 8 outliers defined - above (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --middle row). - This has a negligible impact on the MUE ranking, but fully scrambles the - -\begin_inset Formula $Q_{95}$ -\end_inset - - one, M11 passing from the first to the last place, MP2 from the 8th to - the first, and so on. - In fact, ill-defined ranking matrices can be expected to be very sensitive - to any alteration of the dataset. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_figRanks_mue_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_figRanks_q95hd_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/THA2015_figRanks_msip_levels_Pruned.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/WU2015_figRanks_mue_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/WU2015_figRanks_q95hd_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Graphics - filename ../results/figs/WU2015_figRanks_msip_levels.png - lyxscale 20 - width 32col% - BoundingBox 300bp 400bp 1800bp 1700bp - clip - -\end_inset - - -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:thakkarRPmat" - -\end_inset - -Ranking probability matrices: (a-c) case THA2015 full dataset ( -\begin_inset Formula $N=135$ -\end_inset - -); (d-f) case THA2015 dataset pruned from 8 outliers ( -\begin_inset Formula $N=127$ -\end_inset - -); (g-i) case WU2015 ( -\begin_inset Formula $N=145$ -\end_inset - -). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -When considering the WU2015 dataset, the ranking matrices (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:thakkarRPmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - --bottom row) show much less dispersion, underlining the deleterious role - of experimental errors on ranking. - Note that there remains a notable uncertainty to rank -\begin_inset Formula $\omega$ -\end_inset - -B97, M11, M06-2X and LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH using -\begin_inset Formula $Q_{95}$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -Depending on the reference dataset (experimental or CCSD(T)) one obtains - different rankings: LC- -\begin_inset Formula $\tau$ -\end_inset - -HCTH seems a better option to predict experimental values, whereas MP2 is - a better proxy for CCSD(T) calculations. - -\end_layout - -\begin_layout Subsection -ZAS2019 -\begin_inset CommandInset label -LatexCommand label -name "subsec:Zas2019" - -\end_inset - - -\end_layout - -\begin_layout Standard -The effective atomization energies ( -\begin_inset Formula $E^{*}$ -\end_inset - -) for the QM7b dataset -\begin_inset CommandInset citation -LatexCommand cite -key "Montavon2013" -literal "false" - -\end_inset - -, for 7211 molecules up to 7 heavy atoms (C, N, O, S or Cl) are available - for several basis sets (STO-3g, 6-31g, and cc-pvdz), three quantum chemistry - methods (HF, MP2 and CCSD(T)) and four machine learning algorithms (CM-L1, - CM-L2, SLATM-L1 and SLATM-L2). - The data have been provided by Zaspel -\emph on -et al -\emph default -. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Zaspel2019" -literal "false" - -\end_inset - -. - The machine learning methods have been trained over a random sample of - 1000 CCSD(T) energies (learning set), and the test set contains the prediction - errors for the 6211 remaining systems -\begin_inset CommandInset citation -LatexCommand cite -key "Zaspel2019" -literal "false" - -\end_inset - -. - We retain here only HF, MP2 and SLATM-L2 and compare their ability to predict - CCSD(T) values. -\end_layout - -\begin_layout Paragraph -Correlations. -\end_layout - -\begin_layout Standard -The error sets are essentially uncorrelated (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zasCorrmat" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), whereas small positive correlations can be noted for the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -. - In this problem, it would therefore be possible to ignore correlations - when computing -\begin_inset Formula $P_{inv}$ -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/ZAS2019_CorrMat_Errors_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 300bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/ZAS2019_CorrMat_MUE_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 300bp 1800bp 1800bp - clip - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/ZAS2019_CorrMat_Q95_Spearman.png - lyxscale 20 - width 32text% - BoundingBox 200bp 300bp 1800bp 1800bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:zasCorrmat" - -\end_inset - -Case ZAS2019 - rank correlation matrices: (a) Errors; (b) MUE; (c) -\begin_inset Formula $Q_{95}$ -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Paragraph -Statistics. -\end_layout - -\begin_layout Standard -The values are reported in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - There is a contrast between the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -. - SLATM-L2 and MP2 have close MUE values, with an above-threshold -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}\simeq2P_{inv}=0.06$ -\end_inset - -), and a slight advantage for SLATM-L2. - However, MP2 has a significantly smaller -\begin_inset Formula $Q_{95}$ -\end_inset - -. - As seen on the absolute errors ECDFs (Fig. -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)), SLATM-L2 has indeed a pronounced tail of large errors. - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center - -\size small -\begin_inset Tabular - - - - - - - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Methods -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $P_{inv}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -SIP -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MG -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -ML -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -kcal/mol -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -HF -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.38(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -6.1(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.283(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.743(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --2.03(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.50(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -MP2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -1.31(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.03 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -3.35(5) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.538(5) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.613(6) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout --1.08(2) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.58(5) -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -SLATM-L2 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -1.26(3) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4.7(1) -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.00 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\series bold -0.678(5) -\series default - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -- -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:zas" - -\end_inset - - Case ZAS2019 - absolute error statistics: inversion probabilities and SIP - statistics for comparison with the DFA of smallest MUE (SLATM-L2), except - for -\begin_inset Formula $Q_{95}$ -\end_inset - - inversion probability, where the reference is the DFA with smallest -\begin_inset Formula $Q_{95}$ -\end_inset - - (MP2). - The best scores and the values for which -\begin_inset Formula $(p_{g}=2P_{inv})>0.05$ -\end_inset - - are in boldface. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -This case emphasizes the fact that similar values of the MUE can result - by chance from very distinct error distributions, and that no conclusion - should be taken on the basis of MUE alone. - -\end_layout - -\begin_layout Paragraph -SIP analysis. -\end_layout - -\begin_layout Standard -The SIP matrix (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b)) shows that SLATM-L2 presents a notable improvement probability ( -\begin_inset Formula $\sim$ -\end_inset - -0.75) over HF and a moderate one aver MP2 ( -\begin_inset Formula $\sim0.61$ -\end_inset - -). - Even if SLATM-L2 has significantly better statistics than HF (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)), there remains a 25 -\begin_inset space \thinspace{} -\end_inset - -% chance that the latter provides smaller absolute errors. - In most case studies presented above, the mean gain was larger in absolute - value than the mean loss. - In the comparison between SLATM-L2 and MP2, one observes the opposite: - by choosing SLATM-L2 over MP2 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:zas" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d)), one has 61 -\begin_inset space \thinspace{} -\end_inset - -% chance to get better results, with a mean gain -\begin_inset Formula $\mathrm{MG}\simeq-1.1$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -kcal/mol, and 39 -\begin_inset space \thinspace{} -\end_inset - -% chance to deteriorate the MP2 values with a mean loss -\begin_inset Formula $\mathrm{ML}\simeq1.6$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -kcal/mol. - In agreement with the -\begin_inset Formula $Q_{95}$ -\end_inset - - analysis, this is due to the notable tail of large errors of SLATM-L2. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/ZAS2019_compareECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/ZAS2019_SIPHeatmap.png - lyxscale 25 - width 32text% - BoundingBox 200bp 100bp 1300bp 1200bp - clip - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/ZAS2019_deltaECDF.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/ZAS2019_deltaECDF2.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:zas" - -\end_inset - -Case ZAS2019: : (a) ECDF of the absolute errors; (b) SIP matrix; (c,d) ECDF - of the difference of absolute errors of HF (c) and MP2 (d) with respect - to SLATM-L2 (see Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:Delta-example" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - for details). - The orange band represents the chemical accuracy (1 -\begin_inset space \thinspace{} -\end_inset - -kcal/mol). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -Discussion -\end_layout - -\begin_layout Subsection -Extracting data from articles and supplementary material -\end_layout - -\begin_layout Standard -The raw data of benchmark studies are important assets for the community, - and their accessibility and reusability are very important for intercomparison - studies or the development of alternative statistical analyses, as performed - in this study. - We found many benchmarking studies with practically inaccessible data, - failing the FAIR principle of Open Data -\begin_inset CommandInset citation -LatexCommand cite -key "Wilkinson2016" -literal "false" - -\end_inset - -. - Besides the trivial case of non-available data, we have stumbled on data - stored in complex databases and requiring non-trivial coding for their - extraction, or data stored in inappropriate formats, such as PDF (a Page - Description Format), instead of recognized machine-readable data storage - formats, such as CSV tables. - -\end_layout - -\begin_layout Standard -Note that for some of the cases we gathered here, we were able to extract - data from PDF articles or SI files, but not without some difficulty, involving - several steps of manual operations. - Typical problems for the data extraction from tables in PDF documents are: - excessive numerical truncation, empty cells or complex table mapping, typograph -ical ( -\begin_inset Formula $-$ -\end_inset - -) instead of numerical (-) minus sign, rotated tables, compact notations - for uncertainty (either 123(4) or 123 -\begin_inset Formula $\pm$ -\end_inset - -4), bibliographical references attached to the data (generally processed - by extraction tools as spurious decimals)... - Most of these features preclude automated data extraction and require error-pro -ne human processing. -\end_layout - -\begin_layout Standard -So, unless the structure of the data is complex, and this should not be - the case for most benchmark studies, it is warmly recommended to use "flat" - numerical tables stored in an open format, such as CSV, and to avoid to - put more than one information in each cell. - -\begin_inset Quotes eld -\end_inset - -Think Open, think FAIR ! -\begin_inset Quotes erd -\end_inset - - -\end_layout - -\begin_layout Subsection -Impact of dataset size -\end_layout - -\begin_layout Standard -The examples above have shown that dataset size impacts considerably the - ability to rank methods or to assert the impact of an improved method. - Size effect on the uncertainty of statistics is well known for the mean - value, and similar formulae can be derived for other statistics under normality - hypotheses. - However, the non-normality of error sets requires the use of numerical - methods, typically bootstrap sampling. - This enables to show how the usual benchmark statistics are affected by - sample size. - We have seen, for instance, that there is a notable probability to conclude - erroneously that two -\begin_inset Formula $Q_{95}$ -\end_inset - - values are different when they are not (type I errors or false positive) - if -\begin_inset Formula $N<60$ -\end_inset - - (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Type-I-error" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - Moreover, for small datasets (a few tens of points), even the first digit - of the statistics is often affected by the uncertainty. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -In such cases, ranking with the second or third digit of a statistics has - no sense, unless correlations are taken into account. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -It is practically impossible to predict the dataset size required for a - stable and robust ranking. - Many factors other than set size are involved, notably the number and nature - of methods to be ranked. - When a lot of DFAs are compared, a hierarchical ranking is often performed, - for instance by first choosing the best method at each rung of the Jacob's - ladder, and then comparing these methods together -\begin_inset CommandInset citation -LatexCommand cite -key "Wu2015b" -literal "false" - -\end_inset - -. - This is one way to avoid the ranking uncertainty resulting from the direct - comparison of many methods, as observed in case CAL2019 (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Caldeweyher2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Subsection -The correlation matrix as a sanity check -\end_layout - -\begin_layout Standard -When we started this study, the correlation matrices were mainly intended - to illustrate the importance to consider correlation when comparing statistics. - When cumulating the case studies, it appeared that they contain pertinent - information on the quality of the benchmark dataset. - Considering that model errors in computational chemistry are mostly systematic, - one expects that error patterns over a dataset are characteristic of each - method or family of methods. - This seems to be a basic requirement for sound benchmarking studies. - For the errors correlation matrix, one should thus expect that closely - related methods produce similar error patterns and have strongly correlated - error sets, the correlation level decreasing with a -\begin_inset Quotes eld -\end_inset - -distance -\begin_inset Quotes erd -\end_inset - - between methods. - This is clearly illustrated in the case BOR2019, where the correlation - matrix clusters nicely into relevant DFA groups. - There seems also to be a clean decorrelation between MP2 or MP2-based methods - and DFAs (NAR2019, WU2015). - Similarly, one observes no correlation between HF, MP2 and a machine-learning - method calibrated on CCSD(T) in case ZAS2019. -\end_layout - -\begin_layout Standard - -\emph on -A contrario -\emph default -, when the methods set contains unrelated methods, a uniform strongly positive - correlation matrix should raise an alert. - We have seen in cases DAS2019 and THA2019 that outliers and/or large reference - data errors could dominate the correlation matrix and influence the benchmark - statistics. - Outliers common to all error sets (global outliers) can be efficiently - identified on a parallel plot, as shown in case DAS2019 (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:dasCorrmat-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - If the ranking study is to reflect the methods performances, the curation - and possible pruning of the dataset from such global outliers is a necessary - preliminary step. - Otherwise, more complex statistical models have to be used to alleviate - the impact of those points (see Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Estimation-of-the" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - and several references -\begin_inset CommandInset citation -LatexCommand cite -key "Lejaeghere2014,Pernot2015,Proppe2017" -literal "false" - -\end_inset - -). -\end_layout - -\begin_layout Standard -Note that strongly correlated error sets do not imply similar performances. - For instance a set of linearly scaled harmonic vibrational frequencies - typically has better statistics than the unscaled set, whereas their correlatio -n coefficient is 1 because of the linear transformation between both sets. - One should also remind that the correlation coefficient between calculated - and reference values that is still presented in some benchmarks is not - a reliable performance statistic -\begin_inset CommandInset citation -LatexCommand citep -key "Bland1986" -literal "false" - -\end_inset - -. - At most, it reveals a linear (Pearson) or monotonic (Spearman, Kendall) - association between both datasets, but not their proximity to the identity - line. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Nevertheless, the notable difference ( 0.1- 0.2) between both correlation - estimators, for each method, point to the presence of outliers and/or a - non-linear relationships. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Impact of error sets correlation on ranking -\end_layout - -\begin_layout Standard -The correlation between error sets is partially or totally transferred to - statistics. - Except for linear transformations of the errors, where the transfer is - total, including the sign, one uses Monte Carlo methods to estimate this - transfer. - In many cases, such as for normal, Student's- -\begin_inset Formula $t$ -\end_inset - - or g-and-h error distributions -\begin_inset CommandInset citation -LatexCommand cite -key "Hoaglin1985" -literal "false" - -\end_inset - -, one observes that the correlation intensity mainly decreases when passing - from errors to MUE to -\begin_inset Formula $Q_{95}$ -\end_inset - -. - The case studies above show however that there are exceptions to this basic - trend. - We cannot presently rationalize the observed exceptions, but the main conclusio -n is that in most cases, one should not ignore correlations when comparing - statistics. -\end_layout - -\begin_layout Standard -However, unlike shown above for the error correlations, the visualization - of correlations between statistics might be of secondary interest. - In fact, the paired samples bootstrap algorithms used in this study enable - to account directly for these correlations, without having to estimate - intermediate correlation matrices. - -\end_layout - -\begin_layout Standard -In a vast majority of the cases studied above, the correlation matrices - for MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - have positive coefficients. - These contribute to a reduction of the uncertainty on statistics differences, - with better discernibility between uncertain statistics. - Globally, positive correlations increase the robustness of rankings. -\end_layout - -\begin_layout Subsection -Systematic improvement analysis -\end_layout - -\begin_layout Standard -We introduced a new criterion, the systematic improvement probability (SIP), - which has the major advantage to be independent of the usual descriptive - statistics. - It is based on a sign statistic of the differences of absolute error pairs. - It is a useful complement to the MUE, as it enables to analyze a MUE difference. - All the case studies show that a decrease of MUE results from the balance - between gains and losses. - Two methods pairs were found, in cases PER2018 (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Pernot2018" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) and CAL2019 (Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Caldeweyher2019" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), with SIP values reaching 0.95, close to the full systematic improvement. - But, we did not find in the studied cases a -\begin_inset Quotes eld -\end_inset - -best method -\begin_inset Quotes erd -\end_inset - - which fully improves the results of all lower rank methods. - Because of the well known error compensations in computational chemistry - methods -\begin_inset CommandInset citation -LatexCommand citep -key "Dunning2000" -literal "false" - -\end_inset - -, -\emph on -even physics-based improvements in DFAs do not lead to systematic improvements - for all systems -\emph default -. - We have seen for instance that for band gaps, mBJ degrades LDA predictions - for 16 -\begin_inset space \thinspace{} -\end_inset - -% of the systems (BOR2019). - In fact, there is often a non-negligible percentage of systems for which - a -\begin_inset Quotes eld -\end_inset - -bad -\begin_inset Quotes erd -\end_inset - - method is better than a -\begin_inset Quotes eld -\end_inset - -good -\begin_inset Quotes erd -\end_inset - - one, all across Jacob's ladder. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -As long as the performances of computational chemistry methods rely on error - cancellations, physics-based improvements of DFAs can be seen as a kind - of statistical correction. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -We have also introduced the mean SIP as a possible ranking statistic. - The main advantage of the MSIP is its independence from the usual summary - statistics; its main drawback is that it depends on the set of methods - being compared and it is not transferable to comparisons out of its definition - set. - -\end_layout - -\begin_layout Subsection -Ranking Probability Matrix -\end_layout - -\begin_layout Standard -The ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - provides a diagnostic on the robustness of the ranking by any statistic. - Our tests of MUE, -\begin_inset Formula $Q_{95}$ -\end_inset - - and MSIP rankings show that the dataset size and the number of methods - influence notably the ranking uncertainty. - Without any surprise, the closer the performances of a group of methods, - the more uncertain their ranking. - Depending on the datasets, the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - rankings might conflict and present different levels of robustness ( -\emph on -e.g. -\emph default -, case THA2015). - We would advise to publish systematically both of them, as they provide - complementary information. - -\end_layout - -\begin_layout Standard -In the various cases treated above, the rankings provided by the MSIP are - most often conform to the MUE rankings and are as sensitive as the other - rankings to sampling uncertainty. - When ranking conflicts for the first places occur with the MUE, as was - observed in case PER2018, one gets alerted that the method with the lowest - MUE is not the one providing the largest proportion of small absolute errors. - Due to the non-normality of error distributions, such scenarii are to be - expected, as for inversions in MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - rankings. -\end_layout - -\begin_layout Subsection -Extension to composite datasets -\end_layout - -\begin_layout Standard -We considered here only datasets based on a single property. - Many modern benchmarks are based on composite datasets, involving weighting - schemes to incorporate data with different units -\begin_inset CommandInset citation -LatexCommand cite -key "Goerigk2017" -literal "false" - -\end_inset - -. - The applicability of the SIP to such datasets is straightforward, but the - mean gain and mean loss statistics, having dimensions, should become multivaria -te. - -\end_layout - -\begin_layout Standard -The estimation of -\begin_inset Formula $P_{inv}$ -\end_inset - - and ranking probability matrices for composite statistics ( -\emph on -e.g. -\emph default -, WTMAD -\begin_inset CommandInset citation -LatexCommand cite -key "Goerigk2017" -literal "false" - -\end_inset - -) can use directly the pair-based bootstrap sampling algorithms described - in the present article, although care should be taken to avoid imbalance - between the various components of a dataset by using the so-called -\emph on -stratified -\emph default - bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Hesterberg2015" -literal "false" - -\end_inset - -, preserving the cardinal number of each component in the generated sample. -\end_layout - -\begin_layout Section -Conclusion -\end_layout - -\begin_layout Standard -In this article, we proposed several tools to test the robustness of rankings - or comparisons of methods based on error statistics for non-exhaustive, - limited size datasets. - In order to avoid a normality hypothesis on the errors distributions, bootstrap --based methods were adopted, as suggested by Proppe and Reiher -\begin_inset CommandInset citation -LatexCommand cite -key "Proppe2017" -literal "false" - -\end_inset - - for the estimation of prediction uncertainty of DFT methods. - Our target statistics were the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - -, but these tools are straightforwardly applicable to other statistics. -\end_layout - -\begin_layout Standard -Before any ranking, we have seen that the error sets correlation matrix - can be useful to appreciate the quality of a benchmark dataset. - To our knowledge, this subject has not previously been discussed, and benchmark -ing studies do not presently report such correlation matrices. - -\end_layout - -\begin_layout Standard -Then, the ranking probability matrix -\begin_inset Formula $\mathbf{P}_{r}$ -\end_inset - - for a chosen statistic provides a clear diagnostic on the robustness of - the corresponding ranking. - The impact of dataset size and number of compared methods can be thoroughly - tested. - We encourage benchmark authors to provide adequate uncertainty estimations - and ranking probability matrices, which can be obtained with a negligible - overcharge in computer time. - -\end_layout - -\begin_layout Standard -When considering pairs of methods, we generalized our previous definition - of the inversion probability -\begin_inset Formula $P_{inv}$ -\end_inset - - to account for correlations between statistics and relieve a normal distributio -n hypothesis. - We also introduced the systematic improvement probability (SIP) which is - independent of other descriptive statistics. - We have seen that the use of MUE for ranking hides a complex interplay - between the genuine method improvement and the error cancellations inherent - to most computational chemistry methods. - In particular, we have shown how a difference in MUE is a balance between - gains and losses in absolute errors. - Estimation of the systematic improvement probability (SIP) and the mean - gain (MG) and loss (ML) statistics can help understand this balance, and - to assess the risks of switching between two methods. - Only two of the showcased examples revealed a method which provides a (nearly) - full systematic improvement over one of its concurrents. - Even when comparing an elaborate composite method such as G4MP2 to DFAs - one observes partial SIP values. - A pedagogical virtue of the SIP is to clearly show that computational chemistry - is a science of compromises. -\end_layout - -\begin_layout Standard -We considered here for simplicity raw error sets, from which no care has - been taken to remove systematic trends. - When this is possible, such trend corrections, often simply linear, will - provide much better generalizability of the summary statistics derived - from these error sets. - Besides, this is a necessary step if one wishes to estimate the prediction - uncertainty of any method -\begin_inset CommandInset citation -LatexCommand cite -key "Lejaeghere2014,Pernot2015,Proppe2017" -literal "false" - -\end_inset - -, notably when dealing with non-uniform reference data uncertainties. -\end_layout - -\begin_layout Acknowledgments -The authors are grateful to Pr. - O. -\begin_inset space \thinspace{} -\end_inset - -A. -\begin_inset space \thinspace{} -\end_inset - -von -\begin_inset space ~ -\end_inset - -Lilienfeld for providing the datasets of case ZAS2019, and to Pr. - S. -\begin_inset space \thinspace{} -\end_inset - -Grimme for providing a corrected copy of Supplementary Information for case - CAL2019. -\end_layout - -\begin_layout Section* -Supplementary Information -\end_layout - -\begin_layout Standard -Datasets and R code to reproduce the results of the article are accessible - at -\begin_inset Flex URL -status open - -\begin_layout Plain Layout - -https://github.com/ppernot/SIP -\end_layout - -\end_inset - - or -\begin_inset Flex URL -status open - -\begin_layout Plain Layout - -http://doi.org/10.5281/zenodo.3678481 -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\start_of_appendix -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -appendixpage -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -Estimation of the mean value and its uncertainty -\begin_inset CommandInset label -LatexCommand label -name "sec:Estimation-of-the" - -\end_inset - - -\end_layout - -\begin_layout Standard -Let us consider the mean (signed) value of the errors (MSE). - In absence of uncertainty, it is defined as -\begin_inset Formula -\begin{equation} -\overline{e}=\frac{1}{N}\sum_{i=1}^{N}e_{i} -\end{equation} - -\end_inset - -and its uncertainty (standard error) is estimated as -\begin_inset Formula -\begin{equation} -u(\overline{e})=\sqrt{\frac{s_{e}^{2}}{N}}\label{eq:uref} -\end{equation} - -\end_inset - -where -\begin_inset Formula $s_{e}^{2}$ -\end_inset - - is a sample-based estimator of the population variance -\begin_inset Formula -\begin{equation} -s_{e}^{2}=\frac{1}{N-1}\sum_{i=1}^{N}(e_{i}-\overline{e})^{2} -\end{equation} - -\end_inset - -Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - gives the well-known dependence of the MSE uncertainty with the dataset - size for independent and identically distributed ( -\emph on -i.i.d.) -\emph default -errors, assuming a finite variance, which might exclude error sets with - heavy-tailed distributions, -\emph on -e.g. -\emph default -, Cauchy. - -\begin_inset Foot -status open - -\begin_layout Plain Layout -Note that -\begin_inset Formula $u(\overline{e})$ -\end_inset - - in Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - does not account for the uncertainty on -\begin_inset Formula $s_{e}$ -\end_inset - -. - Taking this factor into account leads to a larger uncertainty, which can - be estimated as -\begin_inset Formula $u(\overline{e})=\sqrt{(N-1)/(N-3)}\thinspace s_{e}/\sqrt{N}$ -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2003" -literal "false" - -\end_inset - -. - This formula is based on the properties of the Student's- -\emph on -t -\emph default - distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Evans2000" -literal "false" - -\end_inset - -. - The impact of the correction factor is notable only for very small datasets - (smaller than 3 -\begin_inset space \thinspace{} -\end_inset - -% for -\begin_inset Formula $N\ge30$ -\end_inset - -), and we will consider the standard formula . -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -If uncertainty on errors -\begin_inset Formula $u(e_{i})$ -\end_inset - - is negligible, -\begin_inset Formula $s_{e}$ -\end_inset - - is an estimation of the standard deviation of the errors distribution -\begin_inset Formula $\sigma$ -\end_inset - -, which represents the dispersion of model errors. - If the reference data are uncertain, -\begin_inset Formula $s_{e}$ -\end_inset - - quantifies a dispersion due to both model errors and reference data uncertainty. - In consequence, it overestimates the dispersion of model errors, and specific - models have to be designed if one wishes to estimate this specific contribution - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -. - This points to the necessity of using accurate reference data if the benchmark - based on standard statistics is to reflect the properties of the studied - methods. -\end_layout - -\begin_layout Standard -To be more specific, in the presence of uncertainty on errors, the weighted - mean is the maximum likelihood estimator of the distribution mean under - normality assumptions -\begin_inset CommandInset citation -LatexCommand cite -key "Bevington1992" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{align} -\overline{e} & =\sum_{i=1}^{N}w_{i}e_{i}\\ -w_{i} & =\frac{u(e_{i})^{-2}}{\sum_{j=1}^{N}u(e_{j})^{-2}}\label{eq:wRefUnc} -\end{align} - -\end_inset - -giving less weight to the more uncertain data. - Direct application of the combination of variances to this expression leads - to -\begin_inset CommandInset citation -LatexCommand cite -key "Bevington1992" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -u(\overline{e})^{2}=\frac{1}{\sum_{j=1}^{N}u(e_{j})^{-2}} -\end{equation} - -\end_inset - -Note that in the case of identical uncertainty for all data, one recovers - the expression for the unweighted case (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). - -\end_layout - -\begin_layout Standard -The validity of this estimation has to be tested by computing the weighted - chi-squared -\begin_inset Formula -\begin{equation} -\chi_{w}^{2}=\sum_{i}\frac{(e_{i}-\overline{e})^{2}}{u(e_{i})^{2}}\label{eq:Birge} -\end{equation} - -\end_inset - -If the errors on the reference data are assumed to be normally distributed, - -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - has a chi-squared distribution with -\begin_inset Formula $N-1$ -\end_inset - - degrees of freedom ( -\begin_inset Formula $\chi_{N-1}^{2}$ -\end_inset - -). - -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - should be close to the mean of this distribution, -\begin_inset Formula $N-1$ -\end_inset - -, and lie within its 95 -\begin_inset space \thinspace{} -\end_inset - -% high probability interval. - If -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - is too small, the -\begin_inset Formula $u(e_{i})$ -\end_inset - - are over-estimated and should be reconsidered, or the benchmarked method - is over-fitting the data, which is unlikely, unless the method is parametric - and has been calibrated on this same dataset. - If -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - is too large, there is an excess of variance in the -\begin_inset Formula $E_{M}$ -\end_inset - - error set -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2004,Rukhin2009,Rivier2014" -literal "false" - -\end_inset - -. - In the typical benchmarking of computational chemistry methods, this is - generally the case because of the extraneous dispersion due to model errors. - To ensure the statistical validity of the weighted mean and its uncertainty, - one has therefore to define a more complex error model, considering explicitly - the two sources of dispersion, and to redefine the weights, accounting - for the excess of variance and possible biases in the error sets -\begin_inset CommandInset citation -LatexCommand cite -key "Lejaeghere2014,Lejaeghere2014a,Pernot2015,DeWaele2016,Proppe2017" -literal "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -If one stipulates that the dispersion of the errors is the combined effect - of model error and reference data uncertainty, one can redefine the weights - as -\begin_inset CommandInset citation -LatexCommand cite -key "Rukhin2009" -literal "false" - -\end_inset - - -\begin_inset Formula -\begin{equation} -w_{i}=\frac{\left(\sigma^{2}+u(e_{i})^{2}\right)^{-1}}{\sum_{j=1}^{N}\left(\sigma^{2}+u(e_{j})^{2}\right)^{-1}}\label{eq:weights-IRWLS} -\end{equation} - -\end_inset - -where -\begin_inset Formula $\sigma^{2}$ -\end_inset - - is the variance of model errors. - With these new weights, -\begin_inset Formula -\begin{equation} -u(\overline{e})^{2}=\frac{1}{\sum_{j=1}^{N}\left(\sigma^{2}+u(e_{j})^{2}\right)^{-1}}\label{eq:uwmean} -\end{equation} - -\end_inset - -converges properly to the standard limit when the reference data errors - become negligible before the model errors. - The model error variance -\begin_inset Formula $\sigma^{2}$ -\end_inset - - can be estimated by decomposing the total variance of the errors into the - variance of model errors plus the mean variance of the data (known as Cochran's - ANOVA estimate -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2004,Rivier2014" -literal "false" - -\end_inset - -) -\begin_inset Formula -\begin{equation} -\mathrm{var}(e)=\sigma^{2}+\frac{1}{N}\sum_{j=1}^{N}u(e_{j})^{2}\label{eq:dispmod} -\end{equation} - -\end_inset - -This variance analysis ensures that -\begin_inset Formula $\chi_{w}^{2}$ -\end_inset - - is correct. - Note that other reweighting schemes exist -\begin_inset CommandInset citation -LatexCommand cite -key "Kacker2004,Rivier2014" -literal "false" - -\end_inset - -, but Cochran's is the simplest. - Besides, all reweighting methods are iterative: -\begin_inset Formula $\sigma$ -\end_inset - - depends on -\begin_inset Formula $\overline{e}$ -\end_inset - -, which itself depends on -\begin_inset Formula $\sigma$ -\end_inset - -. -\end_layout - -\begin_layout Standard -If the dispersion of reference data uncertainties is small, -\emph on -i.e. -\emph default -, smaller than the model errors contribution, one can reasonably consider - that the weights are identical and that the unweighted mean can be used. - Formally, its uncertainty (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uwmean" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) depends on -\begin_inset Formula $\sigma$ -\end_inset - -, which can be directly estimated through Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:dispmod" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, but by construction, one will recover results given by Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:uref" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -One will therefore consider that, unless a large dispersion of reference - data uncertainty is observed, these uncertainties can be ignored in the - estimation of the mean and its standard error. - Otherwise, one should use the weighted mean with the standard uncertainty - estimate. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Note that the dispersion of model errors -\begin_inset Formula $\sigma$ -\end_inset - - is related to the model prediction uncertainty and is a score of interest - for the ranking of models -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Pernot2018" -literal "false" - -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -An advanced modeling of uncertainty sources is crucial if one wishes a reliable - estimate of the MSE, and of the various uncertainty contributions -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015" -literal "false" - -\end_inset - -. - In standard benchmarking, the aim is mostly to compare methods, knowing - that the reference datasets are incomplete. - If reference data uncertainty plays a significant role – that would be - the case if data with very different uncertainty levels were aggregated - in the dataset – one might assume that its impact will be the same for - all methods to be compared. - The values of the dispersion statistics will be consistently overestimated - for all methods. - As long as one is not interested in the accurate estimation of the underlying - properties of the error distributions, such as the model prediction uncertainty - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2015,Proppe2017" -literal "false" - -\end_inset - -, it is simpler to rely on unweighted schemes and properly curated datasets. -\end_layout - -\begin_layout Section -Numerical study of the correlation of nonlinear statistics -\begin_inset CommandInset label -LatexCommand label -name "sec:Covariance-of-scores" - -\end_inset - - -\end_layout - -\begin_layout Standard -To illustrate the transfer of correlation from errors sets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - to their statistics, one assumes that they are described by a bivariate - distribution with prescribed correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - -. - From this distribution, one generates random samples -\begin_inset Formula $E_{1}^{*}$ -\end_inset - - and -\begin_inset Formula $E_{2}^{*}$ -\end_inset - - and one estimates the statistics values -\begin_inset Formula $s_{1}^{*}=S(E_{1}^{*})$ -\end_inset - - and -\begin_inset Formula $s_{2}^{*}=S(E_{2}^{*})$ -\end_inset - -. - -\begin_inset Formula $\mathrm{cor}(s_{1},s_{2})$ -\end_inset - - is finally estimated from -\begin_inset Formula $s_{1}^{*}$ -\end_inset - - and -\begin_inset Formula $s_{2}^{*}$ -\end_inset - - samples. - -\end_layout - -\begin_layout Standard -The error sets correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - - is varied between -1 and 1, and the resulting correlation coefficients - are estimated for the MSE, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - statistics. - The dataset size is -\begin_inset Formula $N=100$ -\end_inset - - and Monte Carlo samples size is -\begin_inset Formula $M=10^{3}$ -\end_inset - - . - -\end_layout - -\begin_layout Standard -The results for four representative cases of the g-and-h distribution used - by Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:The-g-and-h-distribution" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) of error sets are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:corrScore" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a-d). - In this example, both error sets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - have the same distribution with unit variance, only their correlation varies. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0h_0.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0h_0.2.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0.2h_0.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppB_corrScores_g_0.2h_0.2.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppB_corrScoresNormDec.png - lyxscale 25 - width 32text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppB_corrScoresStudDec.png - lyxscale 25 - width 32text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:corrScore" - -\end_inset - -Correlation coefficients -\begin_inset Formula $\mathrm{cor}(s_{1},s_{2})$ -\end_inset - - of statistics ( -\begin_inset Formula $S=$ -\end_inset - - -\begin_inset space \thinspace{} -\end_inset - -MUE, MSE, -\begin_inset Formula $Q_{95})$ -\end_inset - - for two samples as a function of the correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - - of these samples. - The error bars represent 95 -\begin_inset space \thinspace{} -\end_inset - -% intervals for sampling errors. - Four cases of the g-and-h distribution are considered for the error sets: - (a) normal ( -\begin_inset Formula $g=h=0$ -\end_inset - -); (b) heavy-tailed symmetric ( -\begin_inset Formula $g=0;\thinspace h=0.2$ -\end_inset - -); (c) light-tailed asymmetric ( -\begin_inset Formula $g=0.2;\thinspace h=0$ -\end_inset - -); (d) heavy-tailed asymmetric ( -\begin_inset Formula $g=h=0.2$ -\end_inset - -). - Additional cases with shifted distributions, -\begin_inset Formula $\mu$ -\end_inset - -= (-0.2,0.5) : (e) Normal ; (f) Student's- -\begin_inset Formula $t$ -\end_inset - - ( -\begin_inset Formula $\nu$ -\end_inset - -= 5). - All distributions have unit variance. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -These simulations confirm the full correlation transfer to the MSE, independentl -y of the underlying distribution. - The correlation coefficients for the other, non-linear, statistics are - mostly positive (within numerical uncertainty) and systematically smaller - than -\begin_inset Formula $|\rho|$ -\end_inset - -. - They are symmetrical with respect to -\begin_inset Formula $\rho=0$ -\end_inset - - for symmetrical error distributions. - The values for the MUE are consistently larger than, or equal to, the values - for -\begin_inset Formula $Q_{95}$ -\end_inset - -. - In all cases, the correlation coefficient for the MUE is very close to - -\begin_inset Formula $\rho^{2}$ -\end_inset - -. - For negative values of -\begin_inset Formula $\rho$ -\end_inset - -, the correlation coefficient of -\begin_inset Formula $Q_{95}$ -\end_inset - - is sensitive to the asymmetry or the errors distribution. -\end_layout - -\begin_layout Standard -The same procedure has been applied to shifted means ( -\begin_inset Formula $\overline{e}_{1}=-0.2$ -\end_inset - -, -\begin_inset Formula $\overline{e}_{2}=0.5$ -\end_inset - -) for normal and Student's- -\begin_inset Formula $t$ -\end_inset - - distribution with 5 degrees of freedom (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:corrScore" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(e,f)). - For the normal distribution the symmetry observed above is broken, as well - as the pure quadratic trend for the MUE. - For the Student's- -\begin_inset Formula $t$ -\end_inset - - distribution, the correlations lie above a positive threshold and one can - have -\begin_inset Formula $\mathrm{cor}(s_{1},s_{2})>|\rho|$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -Simulation of correlated error samples enabled us to illustrate properties - of correlation transfer to statistics: identical correlation for the MSE, - and smaller, mostly positive, correlations for the MUE and -\begin_inset Formula $Q_{95.}$ -\end_inset - -. - As we covered only a limited set of scenarii, these features cannot be - considered as universal. - Indeed, the case studies in Section -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:Examples" -plural "false" -caps "false" -noprefix "false" - -\end_inset - - reveal some exceptions. -\end_layout - -\begin_layout Section -Type I error Probabilities of for the comparison of MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - pairs -\begin_inset CommandInset label -LatexCommand label -name "sec:Type-I-error" - -\end_inset - - -\end_layout - -\begin_layout Standard -A false positive (type I error) is obtained when a true null hypothesis - is rejected by a test -\begin_inset CommandInset citation -LatexCommand citep -key "Gregory05a,Klauenberg2019" -literal "false" - -\end_inset - -. - Type I errors can be kept at a minimum by choosing appropriate data set - sizes. - Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - estimated the probability of type I errors for the comparison of quantiles - of correlated data sets with their method M (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) and determined the sample size -\begin_inset Formula $N$ -\end_inset - - required to reach a probability of type I errors -\begin_inset Formula $\hat{\alpha}$ -\end_inset - - close to the statistical testing threshold. - For their study, the authors used the g-and-h distribution (Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "sec:The-g-and-h-distribution" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) to generate the data samples, and compared quantiles up two 0.9 for two - levels of correlation, -\begin_inset Formula $\rho=0$ -\end_inset - - and -\begin_inset Formula $0.7$ -\end_inset - -. - In these conditions, they concluded that -\begin_inset Formula $N\ge30$ -\end_inset - - was necessary to achieve a correct level of type I error, considering that - it should not exceed -\begin_inset Formula $0.075$ -\end_inset - - for a test at the 0.05 level -\begin_inset CommandInset citation -LatexCommand cite -key "Bradley1978" -literal "false" - -\end_inset - -. -\end_layout - -\begin_layout Standard -As these test cases did not include our conditions of interest in terms - of correlation (often above -\begin_inset Formula $\rho=0.9$ -\end_inset - -) and quantile level (0.95 for -\begin_inset Formula $Q_{95}$ -\end_inset - -), we performed new simulations, using the same procedure and functions - provided in -\family typewriter -R -\family default - packages -\family typewriter -WRS -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "R-WRS" -literal "false" - -\end_inset - - and -\family typewriter -WRS2 -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "R-WRS2" -literal "false" - -\end_inset - -. - After assessing the reproducibility of the original results, we kept the - same generative distribution and scenarii for -\begin_inset Formula $g$ -\end_inset - - and -\begin_inset Formula $h$ -\end_inset - - parameters, and we extended the exploration for dataset size from -\begin_inset Formula $N=20$ -\end_inset - - to 70, and correlation coefficient -\begin_inset Formula $\rho=0,\thinspace0.5,\thinspace0.9$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -The procedure is the following: one draws two samples -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - - from the same distribution and compute -\begin_inset Formula $p_{g}$ -\end_inset - - for the comparison of the values of a statistic S, -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - -, respectively. - A value of -\begin_inset Formula $p_{g}<0.05$ -\end_inset - - leads to the rejection of the true null hypothesis -\begin_inset Formula $s_{1}=s_{2}$ -\end_inset - -. - The process is repeated -\begin_inset Formula $M$ -\end_inset - - times, and the proportion of rejections provides an estimation of the probabili -ty -\begin_inset Formula $\alpha$ -\end_inset - - of type I errors. - For compatibility with the original study, the number of replications is - kept to -\begin_inset Formula $M=2000$ -\end_inset - -, and the number of bootstrap samples to -\begin_inset Formula $B=1000$ -\end_inset - -. - The results for the comparison of MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - pairs are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppC_Alpha_MUE.png - lyxscale 25 - width 49text% - -\end_inset - - -\begin_inset Graphics - filename ../results/figs/AppC_Alpha_Q95.png - lyxscale 25 - width 49text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:power1" - -\end_inset - -Probability of type I errors -\begin_inset Formula $\alpha$ -\end_inset - - for the MUE (left) and -\begin_inset Formula $Q_{95}$ -\end_inset - - (right), as a function of dataset size -\begin_inset Formula $N$ -\end_inset - -. - Each graph corresponds to a type of g-and-h distribution for the data samples - (see text for details). - The points and lines correspond to a value of the datasets correlation - coefficient -\begin_inset Formula $\rho$ -\end_inset - -. - The black dashed line depicts the upper safety limit (0.075). - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -For the MUE, the safety region ( -\begin_inset Formula $\alpha\le0.075$ -\end_inset - -; black dashed line) -\begin_inset CommandInset citation -LatexCommand cite -key "Bradley1978" -literal "false" - -\end_inset - - is reached in all cases for -\begin_inset Formula $N\ge30$ -\end_inset - -. - Above -\begin_inset Formula $N=40$ -\end_inset - -, all values of -\begin_inset Formula $\alpha$ -\end_inset - - are close to the nominal value (0.05). - There is no remarkable trend with respect to the type of g-and-h distribution, - nor the correlation coefficient. - We have estimated previously -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - - that the MUE is typically located between the 0.5 and 0.75 quantiles, for - which Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - - have concluded that the minimal dataset size is -\begin_inset Formula $N\ge30$ -\end_inset - -, which is confirmed here. -\end_layout - -\begin_layout Standard -For -\begin_inset Formula $Q_{95}$ -\end_inset - -, one sees that for -\begin_inset Formula $N=40$ -\end_inset - -, the situation is more favorable for the normal distribution, but in all - cases, the recommended limit is reached in all cases for -\begin_inset Formula $N\ge60$ -\end_inset - -. - Strong correlation coefficients ( -\begin_inset Formula $\rho=0.9$ -\end_inset - -) seem also to be almost systematically more favorable, and one observes - a slight deleterious effect below -\begin_inset Formula $N=50$ -\end_inset - - for heavy-tailed distributions ( -\begin_inset Formula $h=0.2$ -\end_inset - -). - Nevertheless, even for -\begin_inset Formula $N=40$ -\end_inset - -, -\begin_inset Formula $\alpha$ -\end_inset - - does not exceed notably the 10 -\begin_inset space \thinspace{} -\end_inset - -% probability of type I error. -\end_layout - -\begin_layout Paragraph -Remark. -\end_layout - -\begin_layout Standard -Establishing the power of the test ( -\begin_inset Formula $1-\beta$ -\end_inset - -), where -\begin_inset Formula $\beta$ -\end_inset - - is the probability of type II errors (false negative, or the non-rejection - of a false null hypothesis) -\begin_inset CommandInset citation -LatexCommand citep -key "Gregory05a" -literal "false" - -\end_inset - - requires the definition an alternative hypothesis -\begin_inset CommandInset citation -LatexCommand citep -key "Klauenberg2019" -literal "false" - -\end_inset - -. - In the present case, there is a infinity of ways to realize the -\begin_inset Formula $s_{1}\ne s_{2}$ -\end_inset - - alternative, so the power estimation is practically intractable. - -\end_layout - -\begin_layout Section -Numerical study of the Harrell and Davis algorithm -\begin_inset CommandInset label -LatexCommand label -name "sec:Simulated-example" - -\end_inset - - -\end_layout - -\begin_layout Standard -This example is intended to outline the advantages of Harrell and Davis - (HD) algorithm for quantiles estimation, notably when associated with bootstrap - sampling, as suggested by Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -One considers the values -\begin_inset Formula $s_{1}$ -\end_inset - - and -\begin_inset Formula $s_{2}$ -\end_inset - - of a statistic -\begin_inset Formula $S$ -\end_inset - - for two datasets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - -, which are drawn from a bivariate normal distribution -\begin_inset Formula -\begin{equation} -(E_{1},E_{2})\sim\mathcal{N}\left(\boldsymbol{\mu}=(\mu_{1},\mu_{2}),\boldsymbol{\Sigma}=\left(\begin{array}{cc} -\sigma_{1}^{2} & \rho\sigma_{1}\sigma_{2}\\ -\rho\sigma_{1}\sigma_{2} & \sigma_{2}^{2} -\end{array}\right)\right)\label{eq:bivnorm} -\end{equation} - -\end_inset - -where the error samples have different means -\begin_inset Formula $(\mu_{1},\mu_{2})$ -\end_inset - - and variances -\begin_inset Formula $(\sigma_{1}^{2},\sigma_{2}^{2})$ -\end_inset - -, and -\begin_inset Formula $\mathrm{cov}(E_{1},E_{2})=\rho\sigma_{1}\sigma_{2}$ -\end_inset - -. - The values of the parameters for the simulations and the corresponding - statistics are given in Table -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "tab:Exact-values" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - The values for the MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - are obtained as described in a previous article -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Those values are fairly representative of some problems treated in the case - studies. - -\end_layout - -\end_inset - - -\begin_inset Float table -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\align center -\begin_inset Tabular - - - - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Set -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MSE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -RMSD -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -MUE -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $Q_{95}$ -\end_inset - - -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $E_{1}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.88 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2.16 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -\begin_inset Formula $E_{2}$ -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout - -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.1 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.0 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -0.80 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1.97 -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "tab:Exact-values" - -\end_inset - -Reference values for the univariate statistics of datasets -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - described by Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:bivnorm" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, for -\begin_inset Formula $\mu_{1}=0$ -\end_inset - -, -\begin_inset Formula $\mu_{2}=0.1$ -\end_inset - -, -\begin_inset Formula $\sigma_{1}=1.1$ -\end_inset - - and -\begin_inset Formula $\sigma_{2}=1.0$ -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Comparison of HD and -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - quantiles -\begin_inset CommandInset label -LatexCommand label -name "subsec:Quantiles-estimation-by" - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset Formula $Q_{95}$ -\end_inset - - is estimated by two algorithms: the HD algorithm and the -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - method of Hyndman and Fan -\begin_inset CommandInset citation -LatexCommand cite -key "Hyndman1996" -literal "false" - -\end_inset - -, which is the default algorithms in the -\family typewriter -quantile() -\family default - function of -\family typewriter -R -\family default - -\begin_inset CommandInset citation -LatexCommand cite -key "CiteR" -literal "false" - -\end_inset - -. - -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - is one of a family of quantile estimators based on the linear combination - of one or two order statistics -\begin_inset CommandInset citation -LatexCommand cite -key "Hyndman1996" -literal "false" - -\end_inset - -, whereas the HD algorithm is based on the linear combination of all order - statistics for a sample -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - -. - The latter is more efficient for small samples, but more computationally - demanding -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - -. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - It is not proposed in the base R quantile function options. - -\color orange -Advantage of HD? -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -In a first test, data sets of increasing sizes, between -\begin_inset Formula $N=20$ -\end_inset - - and 500, are generated by random sampling from the normal distribution - for -\begin_inset Formula $E_{2}$ -\end_inset - -, and -\begin_inset Formula $Q_{95}$ -\end_inset - - is estimated for each sample by both algorithms. - This procedure is repeated -\begin_inset Formula $10^{4}$ -\end_inset - - times, and the distributions of -\begin_inset Formula $Q_{95}$ -\end_inset - - values are summarized by a set of five quantiles (0.05, 0.25, 0.5, 0.75, 0.95). - The results are presented in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a). - This simulation shows that the HD quantiles converge faster to the true - value (1.97) than the -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - ones, with less bias for small samples ( -\begin_inset Formula $N<100$ -\end_inset - -). - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppD_Compare_Q95.png - lyxscale 30 - width 65col% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:simul1" - -\end_inset - -Comparison of -\begin_inset Formula $Q_{95}$ -\end_inset - - estimation algorithms, -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - and HD: (a) Monte Carlo sampling; (b) bootstrap sampling; (c) bootstrap - sample histogram for -\begin_inset Formula $N=100$ -\end_inset - -; (d) idem for -\begin_inset Formula $N=400$ -\end_inset - -. - The thicker bars in (a,b) represent 25-75 -\begin_inset space \thinspace{} -\end_inset - -% probability intervals and the finer bars 5-95 -\begin_inset space \thinspace{} -\end_inset - -% probability intervals. - The black dashed line represents the theoretical value for -\begin_inset Formula $Q_{95}$ -\end_inset - - (1.97). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -In a second test, a unique -\begin_inset Formula $E_{2}$ -\end_inset - - sample of size -\begin_inset Formula $N=500$ -\end_inset - - is generated, and subsets of increasing size are taken as initial data - for a bootstrap procedure ( -\begin_inset Formula $10^{4}$ -\end_inset - - repeats). - The bootstrap samples are analyzed as above and plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b). - The difference of convergence between both quantile algorithms is less - striking, but bootstrap for the -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - algorithm seems to produce quite asymmetric distributions, where the median - is close to one of the quartiles. - If one looks at the histograms of sampled values for -\begin_inset Formula $N=100$ -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(c)), one sees that the HD algorithms produces a much smoother bootstrap - sample histogram, where -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - produces a ragged histograms. - The same features are still visible, to a lesser extent, for -\begin_inset Formula $N=400$ -\end_inset - - (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:simul1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(d)). - This property of the HD method explains its good performances for small - samples, when used in conjunction with the bootstrap -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -. - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Impact of dataset size on the comparison of statistics -\begin_inset CommandInset label -LatexCommand label -name "subsec:Impact-of-dataset" - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -For the comparison of the means, one can express directly the discrepancy - factor in terms of the datasets properties, -\begin_inset Formula $s_{1,2}=\mu_{1,2}$ -\end_inset - -, -\begin_inset Formula $u(s_{1,2})=\sigma_{1,2}/\sqrt{N}$ -\end_inset - -, -\begin_inset Formula $\mathrm{cov}(s_{1},s_{2})=\mathrm{cov}(E_{1},E_{2})=\rho\sigma_{1}\sigma_{2}$ -\end_inset - -, as -\begin_inset Formula -\begin{align} -\xi(s_{1},s_{2}) & =\frac{|\mu_{1}-\mu_{2}|}{\sqrt{\frac{1}{N}}\sqrt{\sigma_{1}^{2}+\sigma_{1}^{2}-2\sigma_{1}\sigma_{2}\rho}}\\ - & =\beta\sqrt{N} -\end{align} - -\end_inset - -The number of sample points necessary to reach the discrepancy threshold - -\begin_inset Formula $\kappa$ -\end_inset - - is -\begin_inset Formula -\begin{equation} -N_{t}=\left(\frac{\kappa}{\beta}\right)^{2} -\end{equation} - -\end_inset - -and the -\begin_inset Formula $p_{t}$ -\end_inset - - value can be expressed analytically -\begin_inset Formula -\begin{equation} -p_{t}=2*\left(1-\Phi(\beta\sqrt{N})\right) -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -As a first illustration, let us consider independent samples ( -\begin_inset Formula $\rho=0$ -\end_inset - -). - In this case, the sample size necessary to discriminate the two means is - at least -\begin_inset Formula $N_{t}=855$ -\end_inset - - points. - This is a large value, because the difference in the mean values is much - smaller than the combined standard deviation of the samples. - If the correlation between the datasets is increased to -\begin_inset Formula $\rho=0.9$ -\end_inset - - – a value commonly observed in the real-life datasets – one gets a tenfold - smaller limit sample size, -\begin_inset Formula $N_{t}=89$ -\end_inset - - points. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Estimation of -\begin_inset Formula $p$ -\end_inset - --values -\begin_inset CommandInset label -LatexCommand label -name "subsec:Estimation-of--values" - -\end_inset - - -\end_layout - -\begin_layout Standard -The estimation of -\begin_inset Formula $p$ -\end_inset - --values is obtained by Monte Carlo sampling of -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - sets of size -\begin_inset Formula $N$ -\end_inset - - varying between 20 and 500 ( -\begin_inset Formula $\rho=0.9$ -\end_inset - -). - One first checks that the generalized -\begin_inset Formula $p$ -\end_inset - --value -\begin_inset Formula $p_{g}$ -\end_inset - - (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) is identical to the analytical value of -\begin_inset Formula $p_{t}$ -\end_inset - - for the comparison of mean values (Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:scoreBS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a)). - -\end_layout - -\begin_layout Standard -Then, the interest of the Harrell-Davis algorithm for the estimation of - -\begin_inset Formula $p_{g}$ -\end_inset - - values for the comparison of quantiles is shown in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:scoreBS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b): reaching the 0.05 threshold requires about 250 points for the HD method, - whereas the -\begin_inset Formula $\hat{Q}_{7}$ -\end_inset - - reference quantile algorithm requires about 380 points. - Besides, the HD curve is smoother than the reference one, due to the smoothness - properties of the HD estimator shown above. - -\begin_inset Float figure -placement !tb -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename ../results/figs/AppD_scoresBS.png - lyxscale 30 - width 65col% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:scoreBS" - -\end_inset - -Validation of methodological choices for -\begin_inset Formula $p$ -\end_inset - --value estimation: (a) generalized -\begin_inset Formula $p$ -\end_inset - --value -\begin_inset Formula $p_{g}$ -\end_inset - - for the comparison of means (MSE) compared to the analytical result -\begin_inset Formula $p_{t}$ -\end_inset - - ; (b) impact of the quantile estimation algorithms on -\begin_inset Formula $p_{g}$ -\end_inset - - for the comparison of -\begin_inset Formula $Q_{95}$ -\end_inset - - (see text). -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -The g-and-h distribution -\begin_inset CommandInset label -LatexCommand label -name "sec:The-g-and-h-distribution" - -\end_inset - - -\end_layout - -\begin_layout Standard -The g-and-h distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Hoaglin1985" -literal "false" - -\end_inset - - is typically used to study the impact of distribution shapes on statistics. - If -\begin_inset Formula $z$ -\end_inset - - has a standard normal distribution, its transform -\begin_inset Formula -\begin{equation} -X=\begin{cases} -\frac{1}{g}(e^{gz}-1)e^{\frac{h}{2}z^{2}}, & \mathrm{if}\thinspace g>0\\ -ze^{\frac{h}{2}z^{2}} & \mathrm{if}\thinspace g=0 -\end{cases} -\end{equation} - -\end_inset - -has a g-and-h distribution. - Its shape is defined by parameters -\begin_inset Formula $g$ -\end_inset - - and -\begin_inset Formula $h$ -\end_inset - -, and contains the normal distribution as a special case ( -\begin_inset Formula $g=h=0$ -\end_inset - -). - Besides the normal, three typical cases are proposed by Wilcox and Erceg-Hurn - -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -: heavy-tailed symmetric ( -\begin_inset Formula $g=0;\thinspace h=0.2$ -\end_inset - -), light-tailed asymmetric ( -\begin_inset Formula $g=0.2;\thinspace h=0$ -\end_inset - -), and heavy-tailed asymmetric ( -\begin_inset Formula $g=h=0.2$ -\end_inset - -). - -\end_layout - -\begin_layout Standard -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Statistical power of -\begin_inset Formula $p_{g}$ -\end_inset - - estimation by bootstrap -\end_layout - -\begin_layout Plain Layout -We now test the quality of -\begin_inset Formula $p_{g}$ -\end_inset - - estimated by the MC-BS method (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:MC-BS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) using an hypothetical bivariate normal distribution for -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:bivnorm" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -). -\end_layout - -\begin_layout Plain Layout -This method is applied to the MSE, RMSD, MUE and -\begin_inset Formula $Q_{95}$ -\end_inset - - with -\begin_inset Formula $M=B=1000$ -\end_inset - - and -\begin_inset Formula $\rho=0.9$ -\end_inset - -. - The results are plotted in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -, where the distributions of -\begin_inset Formula $p_{g}$ -\end_inset - - values are summarized by five quantiles (0.05, 0.25, 0.5, 0.75, 0.95). - For the MSE, one sees that the median of the -\begin_inset Formula $p_{g}$ -\end_inset - - values is very close to the theoretical value ( -\emph on -cf. - -\emph default - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:scoreBS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), and the 5 percent threshold is reached for -\begin_inset Formula $N\simeq100$ -\end_inset - -. - However the distribution of -\begin_inset Formula $p_{g}$ -\end_inset - - values is such that it would take more than 300 points to reject with high - confidence (95 -\begin_inset space \thinspace{} -\end_inset - -%) the equality of the means. - The figures are about the same for the RMSD. - For the MUE, the median reaches the 5 percent threshold at -\begin_inset Formula $N\simeq130$ -\end_inset - -, but one would need more than 400 points to reject the equality hypothesis. - In this example, even with datasets of 500 points one has still 25 percent - chance to conclude wrongly that both -\begin_inset Formula $Q_{95}$ -\end_inset - - values are equal. -\end_layout - -\begin_layout Plain Layout -\begin_inset Float figure -placement t -wide false -sideways false -status open - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Graphics - filename /home/pernot/Bureau/Andreas/1-Paper2019Comp/results/figs/Simul_Power.png - lyxscale 30 - width 99text% - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\noindent -\align center -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "fig:power" - -\end_inset - -Power study of the -\begin_inset Formula $p_{g}$ -\end_inset - - value in the testing of the equality of statistics for two correlated normal - samples -\begin_inset Formula $E_{1}\sim\mathcal{N}(0.0,1.1)$ -\end_inset - -, -\begin_inset Formula $E_{2}\sim\mathcal{N}(0.1,1.0)$ -\end_inset - - and -\begin_inset Formula $\rho=\mathrm{cor}(E_{1},E_{2})=0.9$ -\end_inset - -, and for increasing sample size -\begin_inset Formula $N$ -\end_inset - -. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Following Wilcox -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -, problems in the comparison of quantiles is largely due to the standard - estimators for quantiles -\begin_inset CommandInset citation -LatexCommand cite -key "Hyndman1996" -literal "false" - -\end_inset - -, which should be replaced by the Harrell-Davis estimator -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - -. - Wilcox proposes also to use a generalized -\begin_inset Formula $p$ -\end_inset - --value -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997" -literal "false" - -\end_inset - - for the comparison of statistics. - -\end_layout - -\begin_layout Plain Layout -To appraise the effect of this choice, three methods to estimate the -\begin_inset Formula $p$ -\end_inset - --value for the comparison of quantiles are compared: -\end_layout - -\begin_layout Itemize -Q95 : standard R quantile() function / -\begin_inset Formula $p_{t}$ -\end_inset - - -\end_layout - -\begin_layout Itemize -Q95_HD : Harrell & Davis (HD) quantile estimator -\begin_inset CommandInset citation -LatexCommand cite -key "Harrell1982" -literal "false" - -\end_inset - - / -\begin_inset Formula $p_{t}$ -\end_inset - - -\end_layout - -\begin_layout Itemize -Q95_M -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012" -literal "false" - -\end_inset - -: HD quantile estimator / -\begin_inset Formula $p_{g}$ -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997" -literal "false" - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -The results are reported in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. - First, to assess the impact of the Harrell-Davis method, a Monte Carlo - study was performed to generate sample of p-values from repeated sampling - of a bi-variate normal distribution above, with -\begin_inset Formula $\rho=0.9$ -\end_inset - -. - -\end_layout - -\begin_layout Plain Layout - -\color orange -Theoretical value of difference ? -\begin_inset Formula $s_{1}=1.96$ -\end_inset - -, -\begin_inset Formula $s_{2}=2.36$ -\end_inset - -, -\begin_inset Formula $|s_{1}-s_{2}|=0.4$ -\end_inset - - -\end_layout - -\begin_layout Plain Layout -As seen in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a), the HD method provides a marginal advantage, -\emph on -i.e. -\emph default -, the null hypothesis can be rejected for smaller samples. - The 0.05 threshold is reached for a sample size of about 300 for the HD - method, when the standard method requires a sample size of 400. - -\color orange -Compare to theoretical value. -\end_layout - -\begin_layout Plain Layout -When considering the real issue due to bootstrapping of quantiles, the advantage - of the HD method is more pronounced. - Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(b) shows the statistics for the -\begin_inset Formula $p$ -\end_inset - --values obtained by bootstrapping of a sample, with a Monte Carlo repetition - for a series of samples. - For each method and size, the 0.025, 0.25, 0.5, 0.75 and 0.975 percentiles of - the Monte Carlo samples are shown. - -\end_layout - -\begin_layout Plain Layout -A first point to note is that the median values follow curves which are - in good agreement withe the Monte Carlo ones in Fig. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "fig:power" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -(a). - However, the dispersion is quite large, and, even for sample sizes as large - as 1000, the bootstrap has a non-vanishing probability of wrong answers - ( -\emph on -i.e. -\emph default -, -\begin_inset Formula $p>0.05$ -\end_inset - -). - If one looks at the inter-quartile range, one sees that it is always smaller - for the HD method than for the standard one. - In this sense, the Q95_HD method has an improved statistical power over - the Q95 method, but it is only for sample sizes above 700 that the probability - of errors falls below 25 -\begin_inset space \thinspace{} -\end_inset - -%. - The impact of the generalized -\begin_inset Formula $p$ -\end_inset - --value (Q95_M method) is small on our test case (BETTER IQR !!!), but it - might become advantageous for non-normal distributions -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997" -literal "false" - -\end_inset - -. -\begin_inset Foot -status open - -\begin_layout Plain Layout - The Q95_M method is available in -\family typewriter -R -\family default - as the function -\family typewriter -Dqcomhd, -\family default - from package WSR2 -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2012,Wilcox2018,R-WRS2" -literal "false" - -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout - -\series bold -\color orange -Check normality of stats-difference sets -\end_layout - -\begin_layout Plain Layout - -\color orange -Method M (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) gives a single -\begin_inset Formula $p_{g}$ -\end_inset - - value, without any estimate of its statistical power, -\emph on -i.e. - -\emph default - the . - -\end_layout - -\begin_layout Plain Layout - -\color orange -Unfortunately, the distributions underlying the error sets are unknown and - typically non-normal. - If one wants to be able to perform a Monte Carlo estimation of -\begin_inset Formula $P_{g}$ -\end_inset - -'s range of variation for a given pair of datasets, one can attempt to model - the underlying bivariate distribution, which might be a difficult problem, - notably for small samples. - A much easier option is to optimize a bivariate normal distribution with - similar statistics. - For instance, for comparison of MUEs, one could choose a pair of distributions - with MUEs similar to the reference ones, without caring for other statistics, - such as -\begin_inset Formula $RMSD$ -\end_inset - - or -\begin_inset Formula $Q_{95}$ -\end_inset - -. - One could then perform a MC-BS procedure to assess the distribution of - -\begin_inset Formula $p_{g}$ -\end_inset - -. -\end_layout - -\begin_layout Plain Layout -\begin_inset Float algorithm -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: Two error sets -\begin_inset Formula $E_{1}$ -\end_inset - -, -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, and a statistic estimator -\begin_inset Formula $S$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Monte Carlo -\end_layout - -\begin_deeper -\begin_layout Enumerate -generate -\begin_inset Formula $M$ -\end_inset - - paired datasets -\begin_inset Formula $(E_{1}^{(i)},E_{2}^{(i)});i=1..M$ -\end_inset - -, by random sampling of their joint distribution (Eq. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:bivnorm-1" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) -\end_layout - -\end_deeper -\begin_layout Enumerate -Bootstrap -\end_layout - -\begin_deeper -\begin_layout Enumerate -for each dataset pair, a -\begin_inset Formula $p_{g}^{(i)}$ -\end_inset - - value is estimated using Method M (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) -\end_layout - -\end_deeper -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:MC-BS" - -\end_inset - -MC-BS sampling of -\begin_inset Formula $p_{g}$ -\end_inset - - values. -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Float algorithm -wide false -sideways false -status open - -\begin_layout Plain Layout -Input: two correlated error sets -\begin_inset Formula $E_{1}$ -\end_inset - -, -\begin_inset Formula $E_{2}$ -\end_inset - - of size -\begin_inset Formula $N$ -\end_inset - -, and a statistic estimator -\begin_inset Formula $S$ -\end_inset - - (MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - -) -\end_layout - -\begin_layout Enumerate -For both error sets -\end_layout - -\begin_deeper -\begin_layout Enumerate -Estimate the MSE -\begin_inset Formula $m_{i}$ -\end_inset - -, -\begin_inset Formula $S$ -\end_inset - - value -\begin_inset Formula $s_{i}$ -\end_inset - - and their uncertainties -\begin_inset Formula $u(m_{i})$ -\end_inset - - and -\begin_inset Formula $u(s_{i})$ -\end_inset - - by bootstrap -\end_layout - -\begin_layout Enumerate -Optimize the parameters of a normal distribution -\begin_inset Formula $\mathcal{N}(\mu_{i},\sigma_{i})$ -\end_inset - - to minimize the weighted sum of squared residuals -\begin_inset Formula -\begin{equation} -\chi^{2}(\mu_{i},\sigma_{i})=\frac{\left(m_{i}-\mu_{i}\right)^{2}}{u^{2}(m_{i})}+\frac{\left(s_{i}-S_{f}(\mu_{i},\sigma_{i})\right)^{2}}{u^{2}(s_{i})} -\end{equation} - -\end_inset - -where -\begin_inset Formula $S_{f}(\mu,\sigma)$ -\end_inset - - is a function giving the adequate statistic (MUE or -\begin_inset Formula $Q_{95}$ -\end_inset - -) for a folded normal distribution -\begin_inset CommandInset citation -LatexCommand cite -key "Pernot2018" -literal "false" - -\end_inset - -. -\end_layout - -\end_deeper -\begin_layout Enumerate -Estimate the correlation coefficient -\begin_inset Formula $\rho$ -\end_inset - - between -\begin_inset Formula $E_{1}$ -\end_inset - - and -\begin_inset Formula $E_{2}$ -\end_inset - - -\end_layout - -\begin_layout Enumerate -Build a normal bivariate distribution with optimized parameters -\begin_inset Formula -\begin{equation} -(E_{1}^{*},E_{2}^{*})\sim\mathcal{N}\left(\boldsymbol{\mu}=(\mu_{1},\mu_{2}),\boldsymbol{\Sigma}=\left(\begin{array}{cc} -\sigma_{1}^{2} & \rho\sigma_{1}\sigma_{2}\\ -\rho\sigma_{1}\sigma_{2} & \sigma_{2}^{2} -\end{array}\right)\right)\label{eq:bivnorm-1} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Enumerate -Perform a MC-BS analysis to get a set of -\begin_inset Formula $p_{g}$ -\end_inset - - values from this distribution (Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:MC-BS" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -) -\end_layout - -\begin_layout Plain Layout -\begin_inset Caption Standard - -\begin_layout Plain Layout -\begin_inset CommandInset label -LatexCommand label -name "alg:pg-dist" - -\end_inset - -Sampling -\begin_inset Formula $p_{g}$ -\end_inset - - values from approximating normal distributions. - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Most of the pertinent methods tested in the present article come from the - bio-medical statistics (see, -\emph on -e.g., -\emph default - Ref. -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset citation -LatexCommand cite -key "Wilcox2018" -literal "false" - -\end_inset - -), where small samples are very common. - -\end_layout - -\begin_layout Plain Layout -Unless the reference dataset is exhaustive, error statistics are affected - by a finite size uncertainty. - When comparing uncertain statistics it is important to note that they might - not be independent -\begin_inset CommandInset citation -LatexCommand cite -key "Nicholls2016" -literal "false" - -\end_inset - -, which might strongly affect the comparison result. - For instance, the correlation of the means of two samples is equal to the - correlation of the samples, -\begin_inset Formula $\mathrm{cor}(\overline{x},\overline{y})=\mathrm{cor}(X,Y)$ -\end_inset - -. - When two samples are strongly positively correlated, which is often the - case in benchmark error datasets as shown below, small differences in their - means tend to be more significant than for independent samples. - This might have a non-negligible impact on the ranking of methods. -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -Comparison of means -\end_layout - -\begin_layout Plain Layout - -\color orange -Two-Sample t-Test for Equal Means (REF???) cf. - -\begin_inset Flex URL -status open - -\begin_layout Plain Layout - -https://www.itl.nist.gov/div898/handbook/eda/section4/eda43.htm##Snedecor -\end_layout - -\end_inset - - -\end_layout - -\end_inset - - -\begin_inset Note Note -status collapsed - -\begin_layout Plain Layout -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset FloatList figure - -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset Newpage clearpage -\end_inset - - -\end_layout - -\begin_layout Plain Layout -\begin_inset FloatList table - -\end_inset - - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset CommandInset bibtex -LatexCommand bibtex -btprint "btPrintCited" -bibfiles "packages,NN" -options "bibtotoc,unsrturlPP" - -\end_inset - - -\end_layout - -\end_body -\end_document diff --git a/article/compress b/article/compress deleted file mode 100755 index db86e31..0000000 --- a/article/compress +++ /dev/null @@ -1,2 +0,0 @@ -gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dPDFSETTINGS=/printer -dColorImageResolution=200 -sOutputFile=out.pdf $1 - diff --git a/article/NN.bib b/article_split/NN.bib similarity index 99% rename from article/NN.bib rename to article_split/NN.bib index e6dd29b..c18e7f6 100755 --- a/article/NN.bib +++ b/article_split/NN.bib @@ -2672,12 +2672,11 @@ @Article{Pernot2020a @Article{Pernot2020, author = {Pascal Pernot and Andreas Savin}, - journal = {n/a}, + journal = {arXiv:2003.00987}, title = {Probabilistic performance estimators for computational chemistry methods: Systematic Improvement Probability and Ranking Probability Matrix. {I. Theory}}, - year = {n/a}, - pages = {n/a}, - volume = {n/a}, + year = {2020}, owner = {pernot}, + url = {https://arxiv.org/abs/2003.00987}, timestamp = {2020.02.26}, } diff --git a/article_split/article_part1_v1.0.lyx b/article_split/article_part1_v1.0.lyx index e2e3a80..4a08db9 100644 --- a/article_split/article_part1_v1.0.lyx +++ b/article_split/article_part1_v1.0.lyx @@ -2375,11 +2375,137 @@ noprefix "false" -value, with the associated normality hypothesis. +\end_layout + +\begin_layout Subsubsection +Generalized +\begin_inset Formula $p$ +\end_inset + +-value +\end_layout + +\begin_layout Standard +The use of the generalized +\begin_inset Formula $p$ +\end_inset + +-value ( +\begin_inset Formula $p_{g}$ +\end_inset + +), as proposed by Wilcox and Erceg-Hurn +\begin_inset CommandInset citation +LatexCommand cite +key "Liu1997,Wilcox2012" +literal "false" + +\end_inset + + (method M; +\emph on +cf. + +\emph default + Algorithm +\begin_inset space \thinspace{} +\end_inset + + +\begin_inset CommandInset ref +LatexCommand ref +reference "alg:methodM" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +), conveniently avoids to estimate +\begin_inset Formula $u(s_{1}-s_{2})$ +\end_inset + +, and the incurring normality hypothesis of +\begin_inset Formula $p_{t}$ +\end_inset + +. + It is based on a simple counting of null and negative bootstrapped differences + of statistics with paired samples. + If +\begin_inset Formula $S(E_{1})=S(E_{2})$ +\end_inset + +, one expects that the bootstrap sample will generate positive and negative + values of their difference in equal amounts. + In this case, +\begin_inset Formula $p^{*}\simeq1-p^{*}\simeq0.5$ +\end_inset + + and +\begin_inset Formula $p_{g}$ +\end_inset + + is close to 1. + Note that the null values in the differences sample are shared equally + between the positive and negative values. + On the opposite, if there is a small proportion +\begin_inset Formula $p^{*}$ +\end_inset + + of negative values, the mean of the differences sample should be positive, + different from zero. + The smaller +\begin_inset Formula $p^{*}$ +\end_inset + + the farther the mean from zero, and the lower the probability of the null, + +\begin_inset Formula $S(E_{1})=S(E_{2})$ +\end_inset + +, hypothesis. + The symmetric case occurs for large values of +\begin_inset Formula $p^{*}$ +\end_inset + + (small values of +\begin_inset Formula $1-p^{*}$ +\end_inset + +). + As the sign of the difference is irrelevant, a factor two is applied to + estimate +\begin_inset Formula $p_{g}$ +\end_inset + +. + The identity of this algorithm with the analytical +\begin_inset Formula $p$ +\end_inset + +-value for the comparison of the means of normal samples is established + in Appendix +\begin_inset space \thinspace{} +\end_inset + + +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Estimation-of--values" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + \begin_inset Float algorithm placement !t wide false sideways false -status open +status collapsed \begin_layout Plain Layout Input: Two paired error sets @@ -2529,131 +2655,6 @@ literal "false" \end_layout -\begin_layout Subsubsection -Generalized -\begin_inset Formula $p$ -\end_inset - --value -\end_layout - -\begin_layout Standard -The use of the generalized -\begin_inset Formula $p$ -\end_inset - --value ( -\begin_inset Formula $p_{g}$ -\end_inset - -), as proposed by Wilcox and Erceg-Hurn -\begin_inset CommandInset citation -LatexCommand cite -key "Liu1997,Wilcox2012" -literal "false" - -\end_inset - - (method M; -\emph on -cf. - -\emph default - Algorithm -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "alg:methodM" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -), conveniently avoids to estimate -\begin_inset Formula $u(s_{1}-s_{2})$ -\end_inset - -, and the incurring normality hypothesis of -\begin_inset Formula $p_{t}$ -\end_inset - -. - It is based on a simple counting of null and negative bootstrapped differences - of statistics with paired samples. - If -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - -, one expects that the bootstrap sample will generate positive and negative - values of their difference in equal amounts. - In this case, -\begin_inset Formula $p^{*}\simeq1-p^{*}\simeq0.5$ -\end_inset - - and -\begin_inset Formula $p_{g}$ -\end_inset - - is close to 1. - Note that the null values in the differences sample are shared equally - between the positive and negative values. - On the opposite, if there is a small proportion -\begin_inset Formula $p^{*}$ -\end_inset - - of negative values, the mean of the differences sample should be positive, - different from zero. - The smaller -\begin_inset Formula $p^{*}$ -\end_inset - - the farther the mean from zero, and the lower the probability of the null, - -\begin_inset Formula $S(E_{1})=S(E_{2})$ -\end_inset - -, hypothesis. - The symmetric case occurs for large values of -\begin_inset Formula $p^{*}$ -\end_inset - - (small values of -\begin_inset Formula $1-p^{*}$ -\end_inset - -). - As the sign of the difference is irrelevant, a factor two is applied to - estimate -\begin_inset Formula $p_{g}$ -\end_inset - -. - The identity of this algorithm with the analytical -\begin_inset Formula $p$ -\end_inset - --value for the comparison of the means of normal samples is established - in Appendix -\begin_inset space \thinspace{} -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "subsec:Estimation-of--values" -plural "false" -caps "false" -noprefix "false" - -\end_inset - -. -\end_layout - \begin_layout Standard The use of paired samples is essential to capture inter-statistics correlations. Wilcox and Erceg-Hurn diff --git a/article/packages.bib b/article_split/packages.bib similarity index 100% rename from article/packages.bib rename to article_split/packages.bib diff --git a/article/unsrturlPP.bst b/article_split/unsrturlPP.bst similarity index 100% rename from article/unsrturlPP.bst rename to article_split/unsrturlPP.bst