From a625da04002d30e9619f40c1d1b6aeb56864d6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 1 Jun 2024 19:20:11 +0200 Subject: [PATCH 1/6] feat: Support `fit_power_law(implementation = "plfit.p")` to compute the P-value --- R/fit.R | 38 +++++++++++++++++++++++++------------- man/fit_power_law.Rd | 22 ++++++++++++---------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/R/fit.R b/R/fit.R index 7eda30f763..9487092cbd 100644 --- a/R/fit.R +++ b/R/fit.R @@ -99,17 +99,19 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i #' be used to calculate confidence intervals and log-likelihood. See #' [stats4::mle-class()] for details. #' -#' If `implementation` is \sQuote{`plfit`}, then the result is a -#' named list with entries: \item{continuous}{Logical scalar, whether the +#' If `implementation` is \sQuote{`plfit`} or \sQuote{`plfit.p`}, then the result is a +#' named list with entries: +#' \item{continuous}{Logical scalar, whether the #' fitted power-law distribution was continuous or discrete.} -#' \item{alpha}{Numeric scalar, the exponent of the fitted power-law -#' distribution.} \item{xmin}{Numeric scalar, the minimum value from which the +#' \item{alpha}{Numeric scalar, the exponent of the fitted power-law distribution.} +#' \item{xmin}{Numeric scalar, the minimum value from which the #' power-law distribution was fitted. In other words, only the values larger -#' than `xmin` were used from the input vector.} \item{logLik}{Numeric -#' scalar, the log-likelihood of the fitted parameters.} \item{KS.stat}{Numeric -#' scalar, the test statistic of a Kolmogorov-Smirnov test that compares the -#' fitted distribution with the input vector. Smaller scores denote better -#' fit.} \item{KS.p}{Numeric scalar, the p-value of the Kolmogorov-Smirnov +#' than `xmin` were used from the input vector.} +#' \item{logLik}{Numeric scalar, the log-likelihood of the fitted parameters.} +#' \item{KS.stat}{Numeric scalar, the test statistic of a Kolmogorov-Smirnov test +#' that compares the fitted distribution with the input vector. +#' Smaller scores denote better fit.} +#' \item{KS.p}{Only for `plfit.p`. Numeric scalar, the p-value of the Kolmogorov-Smirnov #' test. Small p-values (less than 0.05) indicate that the test rejected the #' hypothesis that the original data could have been drawn from the fitted #' power-law distribution.} @@ -137,15 +139,25 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i #' fit1$logLik #' stats4::logLik(fit2) #' -fit_power_law <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, - implementation = c("plfit", "R.mle"), ...) { +fit_power_law <- function( + x, + xmin = NULL, + start = 2, + force.continuous = FALSE, + implementation = c("plfit", "R.mle", "plfit.p"), + ...) { implementation <- igraph.match.arg(implementation) if (implementation == "r.mle") { power.law.fit.old(x, xmin, start, ...) - } else if (implementation == "plfit") { + } else if (implementation %in% c("plfit", "plfit.p")) { if (is.null(xmin)) xmin <- -1 - power.law.fit.new(x, xmin = xmin, force.continuous = force.continuous) + power.law.fit.new( + x, + xmin = xmin, + force.continuous = force.continuous, + p.value = (implementation == "plfit.p") + ) } } diff --git a/man/fit_power_law.Rd b/man/fit_power_law.Rd index ed1db4d22b..e4d4cb4f3d 100644 --- a/man/fit_power_law.Rd +++ b/man/fit_power_law.Rd @@ -9,7 +9,7 @@ fit_power_law( xmin = NULL, start = 2, force.continuous = FALSE, - implementation = c("plfit", "R.mle"), + implementation = c("plfit", "R.mle", "plfit.p"), ... ) } @@ -49,17 +49,19 @@ Depends on the \code{implementation} argument. If it is be used to calculate confidence intervals and log-likelihood. See \code{\link[stats4:mle-class]{stats4::mle-class()}} for details. -If \code{implementation} is \sQuote{\code{plfit}}, then the result is a -named list with entries: \item{continuous}{Logical scalar, whether the +If \code{implementation} is \sQuote{\code{plfit}} or \sQuote{\code{plfit.p}}, then the result is a +named list with entries: +\item{continuous}{Logical scalar, whether the fitted power-law distribution was continuous or discrete.} -\item{alpha}{Numeric scalar, the exponent of the fitted power-law -distribution.} \item{xmin}{Numeric scalar, the minimum value from which the +\item{alpha}{Numeric scalar, the exponent of the fitted power-law distribution.} +\item{xmin}{Numeric scalar, the minimum value from which the power-law distribution was fitted. In other words, only the values larger -than \code{xmin} were used from the input vector.} \item{logLik}{Numeric -scalar, the log-likelihood of the fitted parameters.} \item{KS.stat}{Numeric -scalar, the test statistic of a Kolmogorov-Smirnov test that compares the -fitted distribution with the input vector. Smaller scores denote better -fit.} \item{KS.p}{Numeric scalar, the p-value of the Kolmogorov-Smirnov +than \code{xmin} were used from the input vector.} +\item{logLik}{Numeric scalar, the log-likelihood of the fitted parameters.} +\item{KS.stat}{Numeric scalar, the test statistic of a Kolmogorov-Smirnov test +that compares the fitted distribution with the input vector. +Smaller scores denote better fit.} +\item{KS.p}{Only for \code{plfit.p}. Numeric scalar, the p-value of the Kolmogorov-Smirnov test. Small p-values (less than 0.05) indicate that the test rejected the hypothesis that the original data could have been drawn from the fitted power-law distribution.} From e7adf0269b15e48ed07f21ecd1bcf39167d97517 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Wed, 10 Jul 2024 12:10:13 +0200 Subject: [PATCH 2/6] add `R_igraph_power_law_fit_new` with p-value --- R/fit.R | 4 ++-- src/cpp11.cpp | 4 ++-- src/rinterface_extra.c | 53 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/R/fit.R b/R/fit.R index 9487092cbd..85e98f2b86 100644 --- a/R/fit.R +++ b/R/fit.R @@ -198,7 +198,7 @@ power.law.fit.old <- function(x, xmin = NULL, start = 2, ...) { alpha } -power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE) { +power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE, p.value = FALSE) { # Argument checks data <- as.numeric(data) xmin <- as.numeric(xmin) @@ -206,7 +206,7 @@ power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE) { on.exit(.Call(R_igraph_finalizer)) # Function call - res <- .Call(R_igraph_power_law_fit, data, xmin, force.continuous) + res <- .Call(R_igraph_power_law_fit_new, data, xmin, force.continuous, p.value) res } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index e142324b89..604ff2b64d 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -361,7 +361,7 @@ extern SEXP R_igraph_path_length_hist(SEXP, SEXP); extern SEXP R_igraph_permute_vertices(SEXP, SEXP); extern SEXP R_igraph_personalized_pagerank(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_personalized_pagerank_vs(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP R_igraph_power_law_fit(SEXP, SEXP, SEXP); +extern SEXP R_igraph_power_law_fit_new(SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_preference_game(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_pseudo_diameter(SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_pseudo_diameter_dijkstra(SEXP, SEXP, SEXP, SEXP, SEXP); @@ -824,7 +824,7 @@ static const R_CallMethodDef CallEntries[] = { {"R_igraph_permute_vertices", (DL_FUNC) &R_igraph_permute_vertices, 2}, {"R_igraph_personalized_pagerank", (DL_FUNC) &R_igraph_personalized_pagerank, 8}, {"R_igraph_personalized_pagerank_vs", (DL_FUNC) &R_igraph_personalized_pagerank_vs, 8}, - {"R_igraph_power_law_fit", (DL_FUNC) &R_igraph_power_law_fit, 3}, + {"R_igraph_power_law_fit_new", (DL_FUNC) &R_igraph_power_law_fit_new, 4}, {"R_igraph_preference_game", (DL_FUNC) &R_igraph_preference_game, 7}, {"R_igraph_pseudo_diameter", (DL_FUNC) &R_igraph_pseudo_diameter, 4}, {"R_igraph_pseudo_diameter_dijkstra", (DL_FUNC) &R_igraph_pseudo_diameter_dijkstra, 5}, diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c index 5180ca2835..b3ac25a52f 100644 --- a/src/rinterface_extra.c +++ b/src/rinterface_extra.c @@ -8331,6 +8331,59 @@ SEXP R_igraph_incident_edges(SEXP pgraph, SEXP pe, SEXP pmode) { return result; } +SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEXP pvalue) +{ + igraph_vector_t c_data; + igraph_plfit_result_t c_res; + igraph_real_t c_xmin; + igraph_bool_t c_force_continuous, c_compute_pvalue; + SEXP result, names; + + SEXP r_result; + + R_SEXP_to_vector(data, &c_data); + IGRAPH_R_CHECK_REAL(xmin); + c_xmin = REAL(xmin)[0]; + IGRAPH_R_CHECK_BOOL(force_continuous); + c_force_continuous = LOGICAL(force_continuous)[0]; + IGRAPH_R_CHECK_BOOL(pvalue); + c_compute_pvalue = LOGICAL(pvalue)[0]; + + IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous)); + + if (c_compute_pvalue) { + igraph_real_t p; + igraph_plfit_result_calculate_p_value(&c_res, &p, 0.001); + + PROTECT(result=NEW_LIST(6)); + PROTECT(names=NEW_CHARACTER(6)); + + SET_VECTOR_ELT(result, 5, Rf_ScalarReal(p)); + SET_STRING_ELT(names, 5, Rf_mkChar("KS.p")); + } else { + PROTECT(result=NEW_LIST(5)); + PROTECT(names=NEW_CHARACTER(5)); + } + + SET_VECTOR_ELT(result, 0, Rf_ScalarLogical(c_res.continuous)); + SET_VECTOR_ELT(result, 1, Rf_ScalarReal(c_res.alpha)); + SET_VECTOR_ELT(result, 2, Rf_ScalarReal(c_res.xmin)); + SET_VECTOR_ELT(result, 3, Rf_ScalarReal(c_res.L)); + SET_VECTOR_ELT(result, 4, Rf_ScalarReal(c_res.D)); + + SET_STRING_ELT(names, 0, Rf_mkChar("continuous")); + SET_STRING_ELT(names, 1, Rf_mkChar("alpha")); + SET_STRING_ELT(names, 2, Rf_mkChar("xmin")); + SET_STRING_ELT(names, 3, Rf_mkChar("logLik")); + SET_STRING_ELT(names, 4, Rf_mkChar("KS.stat")); + SET_NAMES(result, names); + + r_result = result; + + UNPROTECT(2); + return(r_result); +} + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C */ /* C */ /* Given a HIERARCHIC CLUSTERING, described as a sequence of C */ From 706cb35f8831f59c6a374848e681d162fa211e0f Mon Sep 17 00:00:00 2001 From: Michael Antonov Date: Mon, 22 Jul 2024 12:33:19 +0200 Subject: [PATCH 3/6] Update src/rinterface_extra.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kirill Müller --- src/rinterface_extra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c index b3ac25a52f..33ca705acc 100644 --- a/src/rinterface_extra.c +++ b/src/rinterface_extra.c @@ -8331,7 +8331,7 @@ SEXP R_igraph_incident_edges(SEXP pgraph, SEXP pe, SEXP pmode) { return result; } -SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEXP pvalue) +SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEXP compute_pvalue) { igraph_vector_t c_data; igraph_plfit_result_t c_res; From 29da9791db4452ee795b592f1c262a0786a3430a Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Mon, 22 Jul 2024 12:37:47 +0200 Subject: [PATCH 4/6] fix compilation --- src/rinterface_extra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c index 33ca705acc..ce61361a5e 100644 --- a/src/rinterface_extra.c +++ b/src/rinterface_extra.c @@ -8346,8 +8346,8 @@ SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEX c_xmin = REAL(xmin)[0]; IGRAPH_R_CHECK_BOOL(force_continuous); c_force_continuous = LOGICAL(force_continuous)[0]; - IGRAPH_R_CHECK_BOOL(pvalue); - c_compute_pvalue = LOGICAL(pvalue)[0]; + IGRAPH_R_CHECK_BOOL(compute_pvalue); + c_compute_pvalue = LOGICAL(compute_pvalue)[0]; IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous)); From 79167fa07bbdfe732c118c770e3819d81dd7a587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 22 Aug 2024 11:03:35 +0200 Subject: [PATCH 5/6] Add comment --- src/rinterface_extra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c index ce61361a5e..00e42a705b 100644 --- a/src/rinterface_extra.c +++ b/src/rinterface_extra.c @@ -8349,6 +8349,7 @@ SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEX IGRAPH_R_CHECK_BOOL(compute_pvalue); c_compute_pvalue = LOGICAL(compute_pvalue)[0]; + # Can't use the generated `R_igraph_power_law_fit()` because we need `c_res` to compute the p-value IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous)); if (c_compute_pvalue) { From dc277f636cb5baed848e4f914332448ae5f98509 Mon Sep 17 00:00:00 2001 From: Michael Antonov Date: Thu, 22 Aug 2024 11:09:05 +0200 Subject: [PATCH 6/6] Fix comment --- src/rinterface_extra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c index 00e42a705b..5c975b1541 100644 --- a/src/rinterface_extra.c +++ b/src/rinterface_extra.c @@ -8349,7 +8349,7 @@ SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEX IGRAPH_R_CHECK_BOOL(compute_pvalue); c_compute_pvalue = LOGICAL(compute_pvalue)[0]; - # Can't use the generated `R_igraph_power_law_fit()` because we need `c_res` to compute the p-value + // Can't use the generated `R_igraph_power_law_fit()` because we need `c_res` to compute the p-value IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous)); if (c_compute_pvalue) {