diff --git a/R/fit.R b/R/fit.R index 7eda30f763..85e98f2b86 100644 --- a/R/fit.R +++ b/R/fit.R @@ -99,17 +99,19 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i #' be used to calculate confidence intervals and log-likelihood. See #' [stats4::mle-class()] for details. #' -#' If `implementation` is \sQuote{`plfit`}, then the result is a -#' named list with entries: \item{continuous}{Logical scalar, whether the +#' If `implementation` is \sQuote{`plfit`} or \sQuote{`plfit.p`}, then the result is a +#' named list with entries: +#' \item{continuous}{Logical scalar, whether the #' fitted power-law distribution was continuous or discrete.} -#' \item{alpha}{Numeric scalar, the exponent of the fitted power-law -#' distribution.} \item{xmin}{Numeric scalar, the minimum value from which the +#' \item{alpha}{Numeric scalar, the exponent of the fitted power-law distribution.} +#' \item{xmin}{Numeric scalar, the minimum value from which the #' power-law distribution was fitted. In other words, only the values larger -#' than `xmin` were used from the input vector.} \item{logLik}{Numeric -#' scalar, the log-likelihood of the fitted parameters.} \item{KS.stat}{Numeric -#' scalar, the test statistic of a Kolmogorov-Smirnov test that compares the -#' fitted distribution with the input vector. Smaller scores denote better -#' fit.} \item{KS.p}{Numeric scalar, the p-value of the Kolmogorov-Smirnov +#' than `xmin` were used from the input vector.} +#' \item{logLik}{Numeric scalar, the log-likelihood of the fitted parameters.} +#' \item{KS.stat}{Numeric scalar, the test statistic of a Kolmogorov-Smirnov test +#' that compares the fitted distribution with the input vector. +#' Smaller scores denote better fit.} +#' \item{KS.p}{Only for `plfit.p`. Numeric scalar, the p-value of the Kolmogorov-Smirnov #' test. Small p-values (less than 0.05) indicate that the test rejected the #' hypothesis that the original data could have been drawn from the fitted #' power-law distribution.} @@ -137,15 +139,25 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i #' fit1$logLik #' stats4::logLik(fit2) #' -fit_power_law <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, - implementation = c("plfit", "R.mle"), ...) { +fit_power_law <- function( + x, + xmin = NULL, + start = 2, + force.continuous = FALSE, + implementation = c("plfit", "R.mle", "plfit.p"), + ...) { implementation <- igraph.match.arg(implementation) if (implementation == "r.mle") { power.law.fit.old(x, xmin, start, ...) - } else if (implementation == "plfit") { + } else if (implementation %in% c("plfit", "plfit.p")) { if (is.null(xmin)) xmin <- -1 - power.law.fit.new(x, xmin = xmin, force.continuous = force.continuous) + power.law.fit.new( + x, + xmin = xmin, + force.continuous = force.continuous, + p.value = (implementation == "plfit.p") + ) } } @@ -186,7 +198,7 @@ power.law.fit.old <- function(x, xmin = NULL, start = 2, ...) { alpha } -power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE) { +power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE, p.value = FALSE) { # Argument checks data <- as.numeric(data) xmin <- as.numeric(xmin) @@ -194,7 +206,7 @@ power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE) { on.exit(.Call(R_igraph_finalizer)) # Function call - res <- .Call(R_igraph_power_law_fit, data, xmin, force.continuous) + res <- .Call(R_igraph_power_law_fit_new, data, xmin, force.continuous, p.value) res } diff --git a/man/fit_power_law.Rd b/man/fit_power_law.Rd index ed1db4d22b..e4d4cb4f3d 100644 --- a/man/fit_power_law.Rd +++ b/man/fit_power_law.Rd @@ -9,7 +9,7 @@ fit_power_law( xmin = NULL, start = 2, force.continuous = FALSE, - implementation = c("plfit", "R.mle"), + implementation = c("plfit", "R.mle", "plfit.p"), ... ) } @@ -49,17 +49,19 @@ Depends on the \code{implementation} argument. If it is be used to calculate confidence intervals and log-likelihood. See \code{\link[stats4:mle-class]{stats4::mle-class()}} for details. -If \code{implementation} is \sQuote{\code{plfit}}, then the result is a -named list with entries: \item{continuous}{Logical scalar, whether the +If \code{implementation} is \sQuote{\code{plfit}} or \sQuote{\code{plfit.p}}, then the result is a +named list with entries: +\item{continuous}{Logical scalar, whether the fitted power-law distribution was continuous or discrete.} -\item{alpha}{Numeric scalar, the exponent of the fitted power-law -distribution.} \item{xmin}{Numeric scalar, the minimum value from which the +\item{alpha}{Numeric scalar, the exponent of the fitted power-law distribution.} +\item{xmin}{Numeric scalar, the minimum value from which the power-law distribution was fitted. In other words, only the values larger -than \code{xmin} were used from the input vector.} \item{logLik}{Numeric -scalar, the log-likelihood of the fitted parameters.} \item{KS.stat}{Numeric -scalar, the test statistic of a Kolmogorov-Smirnov test that compares the -fitted distribution with the input vector. Smaller scores denote better -fit.} \item{KS.p}{Numeric scalar, the p-value of the Kolmogorov-Smirnov +than \code{xmin} were used from the input vector.} +\item{logLik}{Numeric scalar, the log-likelihood of the fitted parameters.} +\item{KS.stat}{Numeric scalar, the test statistic of a Kolmogorov-Smirnov test +that compares the fitted distribution with the input vector. +Smaller scores denote better fit.} +\item{KS.p}{Only for \code{plfit.p}. Numeric scalar, the p-value of the Kolmogorov-Smirnov test. Small p-values (less than 0.05) indicate that the test rejected the hypothesis that the original data could have been drawn from the fitted power-law distribution.} diff --git a/src/cpp11.cpp b/src/cpp11.cpp index e142324b89..604ff2b64d 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -361,7 +361,7 @@ extern SEXP R_igraph_path_length_hist(SEXP, SEXP); extern SEXP R_igraph_permute_vertices(SEXP, SEXP); extern SEXP R_igraph_personalized_pagerank(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_personalized_pagerank_vs(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP R_igraph_power_law_fit(SEXP, SEXP, SEXP); +extern SEXP R_igraph_power_law_fit_new(SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_preference_game(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_pseudo_diameter(SEXP, SEXP, SEXP, SEXP); extern SEXP R_igraph_pseudo_diameter_dijkstra(SEXP, SEXP, SEXP, SEXP, SEXP); @@ -824,7 +824,7 @@ static const R_CallMethodDef CallEntries[] = { {"R_igraph_permute_vertices", (DL_FUNC) &R_igraph_permute_vertices, 2}, {"R_igraph_personalized_pagerank", (DL_FUNC) &R_igraph_personalized_pagerank, 8}, {"R_igraph_personalized_pagerank_vs", (DL_FUNC) &R_igraph_personalized_pagerank_vs, 8}, - {"R_igraph_power_law_fit", (DL_FUNC) &R_igraph_power_law_fit, 3}, + {"R_igraph_power_law_fit_new", (DL_FUNC) &R_igraph_power_law_fit_new, 4}, {"R_igraph_preference_game", (DL_FUNC) &R_igraph_preference_game, 7}, {"R_igraph_pseudo_diameter", (DL_FUNC) &R_igraph_pseudo_diameter, 4}, {"R_igraph_pseudo_diameter_dijkstra", (DL_FUNC) &R_igraph_pseudo_diameter_dijkstra, 5}, diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c index 5180ca2835..5c975b1541 100644 --- a/src/rinterface_extra.c +++ b/src/rinterface_extra.c @@ -8331,6 +8331,60 @@ SEXP R_igraph_incident_edges(SEXP pgraph, SEXP pe, SEXP pmode) { return result; } +SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEXP compute_pvalue) +{ + igraph_vector_t c_data; + igraph_plfit_result_t c_res; + igraph_real_t c_xmin; + igraph_bool_t c_force_continuous, c_compute_pvalue; + SEXP result, names; + + SEXP r_result; + + R_SEXP_to_vector(data, &c_data); + IGRAPH_R_CHECK_REAL(xmin); + c_xmin = REAL(xmin)[0]; + IGRAPH_R_CHECK_BOOL(force_continuous); + c_force_continuous = LOGICAL(force_continuous)[0]; + IGRAPH_R_CHECK_BOOL(compute_pvalue); + c_compute_pvalue = LOGICAL(compute_pvalue)[0]; + + // Can't use the generated `R_igraph_power_law_fit()` because we need `c_res` to compute the p-value + IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous)); + + if (c_compute_pvalue) { + igraph_real_t p; + igraph_plfit_result_calculate_p_value(&c_res, &p, 0.001); + + PROTECT(result=NEW_LIST(6)); + PROTECT(names=NEW_CHARACTER(6)); + + SET_VECTOR_ELT(result, 5, Rf_ScalarReal(p)); + SET_STRING_ELT(names, 5, Rf_mkChar("KS.p")); + } else { + PROTECT(result=NEW_LIST(5)); + PROTECT(names=NEW_CHARACTER(5)); + } + + SET_VECTOR_ELT(result, 0, Rf_ScalarLogical(c_res.continuous)); + SET_VECTOR_ELT(result, 1, Rf_ScalarReal(c_res.alpha)); + SET_VECTOR_ELT(result, 2, Rf_ScalarReal(c_res.xmin)); + SET_VECTOR_ELT(result, 3, Rf_ScalarReal(c_res.L)); + SET_VECTOR_ELT(result, 4, Rf_ScalarReal(c_res.D)); + + SET_STRING_ELT(names, 0, Rf_mkChar("continuous")); + SET_STRING_ELT(names, 1, Rf_mkChar("alpha")); + SET_STRING_ELT(names, 2, Rf_mkChar("xmin")); + SET_STRING_ELT(names, 3, Rf_mkChar("logLik")); + SET_STRING_ELT(names, 4, Rf_mkChar("KS.stat")); + SET_NAMES(result, names); + + r_result = result; + + UNPROTECT(2); + return(r_result); +} + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C */ /* C */ /* Given a HIERARCHIC CLUSTERING, described as a sequence of C */