Merge pull request #2 from EricMarcon/JSPE

JSPE
EricMarcon · Dec 11, 2023 · 93c097c · 93c097c
2 parents a814b98 + e580268
commit 93c097c
Show file tree

Hide file tree

Showing 22 changed files with 1,539 additions and 565 deletions.
diff --git a/.#Extra/Local CI/Mhat.R b/.#Extra/Local CI/Mhat.R
@@ -0,0 +1,194 @@
+Mhat <-
+function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, 
+         CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL,
+         Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, 
+         verbose = interactive(), CheckArguments = TRUE)
+{
+  # Eliminate erroneous configurations
+  if (CheckArguments) {
+    CheckdbmssArguments()
+    if (CaseControl & (ReferenceType==NeighborType)) {
+      warning("Cases and controls are identical.")
+      return(rep(1,length(r)))
+    }
+    if (Quantiles & !Individual)
+      stop("Quantiles can't be TRUE if Individual is FALSE.")
+  }
+
+  # Default r values: 64 values up to half the max distance
+  if (is.null(r)) {
+    if (inherits(X, "Dtable")) {
+      # Dtable case
+      rMax <- max(X$Dmatrix)
+    } else {
+      # wmppp case
+      rMax <- diameter(X$window)
+    }
+    r <- rMax*c(0, 1:20, seq(22, 40, 2), seq(45, 100,5), seq(110, 200, 10), seq(220, 400, 20))/800
+  }
+
+  # Vectors to recognize point types
+  IsReferenceType <- X$marks$PointType==ReferenceType
+  IsNeighborType <- X$marks$PointType==NeighborType
+
+  if (!is.null(ReferencePoint)) {
+    # Set individual to TRUE if a refernce point is given
+    Individual <- TRUE
+    if (IsReferenceType[ReferencePoint]) {
+      # Remember the name of the reference point in the dataset of reference type
+      ReferencePoint_name <- row.names(X$marks[ReferencePoint, ])
+    } else {
+      # The reference point must be in the reference type
+      stop("The reference point must be of the reference point type.")
+    }
+  }
+
+  # Global ratio
+  if (ReferenceType==NeighborType | CaseControl) {
+    WrMinusReferencePoint <- sum(X$marks$PointWeight[IsReferenceType])-X$marks$PointWeight
+    Wn <- WrMinusReferencePoint[IsReferenceType]
+  } else {
+    Wn <- sum(X$marks$PointWeight[IsNeighborType])
+  }
+  if (CaseControl) {
+    Wa <- sum(X$marks$PointWeight[IsNeighborType]) 
+  } else {
+    WaMinusReferencePoint <- sum(X$marks$PointWeight)-X$marks$PointWeight
+    Wa <- WaMinusReferencePoint[IsReferenceType]
+  }
+  GlobalRatio <- Wn/Wa
+
+  Nr <- length(r)
+  # Neighborhoods (i.e. all neighbors of a point less than a distance apart)
+  # Store weights of neighbors of interest in first Nr columns, all points from Nr+1 to 2*Nr
+
+  # Call C routine to fill Nbd
+  if (CaseControl) {
+    if (inherits(X, "Dtable")) {
+      # Dtable case
+      Nbd <- parallelCountNbdDtCC(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType)
+    } else {
+      # wmppp case
+      Nbd <- parallelCountNbdCC(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType)
+    }
+  } else {
+    if (inherits(X, "Dtable")) {
+      # Dtable case
+      Nbd <- parallelCountNbdDt(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType)
+    } else {
+      # wmppp case
+      Nbd <- parallelCountNbd(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType)
+    }
+  }
+
+  # Cumulate weights up to each distance
+  NbdInt <- t(apply(Nbd[, seq_len(Nr)], 1, cumsum))
+  NbdAll <- t(apply(Nbd[, (Nr + 1):(2 * Nr)], 1, cumsum))
+
+  # Calculate the ratio of points of interest around each point
+  LocalRatio <- NbdInt/NbdAll
+
+  if (is.null(ReferencePoint)) {
+    # Divide it by the global ratio. Ignore points with no neighbor at all.
+    Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)]))
+    # Keep individual values
+    if (Individual) {
+      Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio))
+    }
+  } else {
+    # Find the reference point in the set of points of the reference type
+    ReferencePoint_index <- which(rownames(X[IsReferenceType]$marks) == ReferencePoint_name)
+    # Only keep the value of the reference point
+    Mvalues <- LocalRatio[ReferencePoint_index, ]/GlobalRatio[ReferencePoint_index]
+  }
+
+  # Put the results into an fv object
+  MEstimate <- data.frame(r, rep(1, length(r)), Mvalues)
+  ColNames <- c("r", "theo", "M")
+  Labl <- c("r", "%s[theo](r)", "hat(%s)(r)")
+  Desc <- c("Distance argument r", "Theoretical independent %s", "Estimated %s")  
+  if (Individual & is.null(ReferencePoint)) {
+    # ColNumbers will usually be line numbers of the marks df, but may be real names.
+    ColNumbers <- row.names(X$marks[IsReferenceType, ])
+    ColNames <- c(ColNames, paste("M", ColNumbers, sep="_"))
+    Labl <- c(Labl, paste("hat(%s)[", ColNumbers, "](r)", sep=""))
+    Desc <- c(Desc, paste("Individual %s around point", ColNumbers))
+  }
+  colnames(MEstimate) <- ColNames
+
+  # Make an fv object
+  M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M", 
+          fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl, 
+          desc=Desc, unitname=X$window$unit, fname="M")
+  fvnames(M, ".") <- ColNames[-1]
+
+
+  # Calculate the quantiles of the individual values with respect to the null hypothesis
+  if (Quantiles) {
+    # Run Monte Carlo simulations of Mhat for each reference point
+    nReferencePoints <- sum(IsReferenceType)
+    # Prepare a matrix to save quantiles
+    MQuantiles <- matrix(0, nrow = length(r), ncol = nReferencePoints)
+    colnames(MQuantiles) <- paste("M", ColNumbers, sep="_")
+    rownames(MQuantiles) <- r
+    if (verbose) ProgressBar <- utils::txtProgressBar(min = 0, max = nReferencePoints)
+    for (i in seq_len(nReferencePoints)) {
+      # Null hypothesis
+      SimulatedPP <- expression(
+        rRandomLocation(
+          X, 
+          ReferencePoint = which(X$marks$PointType == ReferenceType)[i], 
+          CheckArguments = FALSE
+        )
+      )
+      # Compute the simulations. The envelope is useless: we need the simulated values.
+      Envelope <- envelope(
+        # The value Mhat(X) is not used but the point #1 must be of ReferenceType
+        # so retain points of ReferenceType only to save time
+        X[X$marks$PointType == ReferenceType], 
+        fun = Mhat, 
+        nsim = NumberOfSimulations, 
+        # nrank may be any value because the envelope is not used
+        nrank = 1,
+        # Arguments for Mhat()
+        r = r, 
+        ReferenceType = ReferenceType, 
+        NeighborType = NeighborType, 
+        CaseControl = CaseControl, 
+        Individual = TRUE, 
+        # The reference point is always 1 after rRandomLocation()
+        ReferencePoint = 1, 
+        CheckArguments = FALSE,
+        # Arguments for envelope()
+        simulate = SimulatedPP, 
+        # Do not show the progress
+        verbose = FALSE, 
+        # Save individual simulations into attribute simfuns
+        savefuns = TRUE
+      )
+      # Get the distribution of simulated values (eliminate the "r" column). 
+      Simulations <- as.matrix(attr(Envelope, "simfuns"))[, -1]
+      # Calculate the quantiles of observed values
+      for (r_i in seq_along(r)) {
+        if (any(!is.na(Simulations[r_i, ]))) {
+          # Check that at least one simulated value is not NaN so that ecdf() works.
+          MQuantiles[r_i, i] <- stats::ecdf(Simulations[r_i, ])(Mvalues[r_i, i])
+        } else {
+          MQuantiles[r_i, i] <- NaN
+        }
+      }
+      # Progress bar
+      if (verbose) utils::setTxtProgressBar(ProgressBar, i)
+    }
+    if (verbose) close(ProgressBar)
+    # Save the quantiles as an attribute of the fv
+    attr(M, "Quantiles") <- MQuantiles
+    attr(M, "Alpha") <- Alpha
+  }
+
+  if (Individual & is.null(ReferencePoint)) {
+    # Save the reference type for future smoothing
+    attr(M, "ReferenceType") <- ReferenceType
+  }
+  return(M)
+}
diff --git a/.#Extra/Local CI/Mhat.Rd b/.#Extra/Local CI/Mhat.Rd
@@ -0,0 +1,81 @@
+\name{Mhat}
+\alias{Mhat}
+\title{
+  Estimation of the M function
+}
+\description{
+  Estimates the \emph{M} function
+}
+\usage{
+Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType,
+    CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL,
+    Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, 
+    verbose = interactive(), CheckArguments = TRUE)
+}
+\arguments{
+  \item{X}{
+  A weighted, marked planar point pattern (\code{\link{wmppp.object}}) or a \code{\link{Dtable}} object.
+  }
+  \item{r}{
+  A vector of distances. If \code{NULL}, a default value is set: 64 unequally spaced values are used up to half the maximum distance between points \eqn{d_m}. The first value is 0, first steps are small (\eqn{d_m/800}) then increase progressively up to \eqn{d_m/40}.
+  }
+  \item{ReferenceType}{
+  One of the point types.
+  }
+  \item{ReferencePoint}{
+  The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. 
+  }
+  \item{NeighborType}{
+  One of the point types. By default, the same as reference type.
+  }
+  \item{CaseControl}{
+  Logical; if \code{TRUE}, the case-control version of \emph{M} is computed. \emph{ReferenceType} points are cases, \emph{NeighborType} points are controls.
+  }
+  \item{Individual}{
+  Logical; if \code{TRUE}, values of the function around each individual point are returned.
+  }
+  \item{Quantiles}{
+  If \code{TRUE}, Monte-Carlo simulations are run to obtain the distribution of each individual \emph{M} value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated).
+  }
+  \item{NumberOfSimulations}{
+  The number of simulations to run, 100 by default.
+  }
+  \item{Alpha}{
+  The risk level, 5\% by default.
+  }
+  \item{verbose}{
+  Logical; if \code{TRUE}, print progress reports during the simulations.
+  }
+  \item{CheckArguments}{
+  Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere.
+  }
+}
+\details{
+  \emph{M} is a weighted, cumulative, relative measure of a point pattern structure. Its value at any distance is the ratio of neighbors of the \emph{NeighborType} to all points around \emph{ReferenceType} points, normalized by its value over the windows.
+
+  If \emph{CaseControl} is \code{TRUE}, then \emph{ReferenceType} points are cases and \emph{NeighborType} points are controls. The univariate concentration of cases is calculated as if \emph{NeighborType} was equal to \emph{ReferenceType}, but only controls are considered when counting all points around cases (Marcon et al., 2012). This makes sense when the sampling design is such that all points of \emph{ReferenceType} (the cases) but only a sample of the other points (the controls) are recorded. Then, the whole distribution of points is better represented by the controls alone.
+
+  If \emph{Individual} is \code{TRUE}, then the individual values \emph{M} in the neighborhood of each point are returned. If \emph{ReferencePoint} is also specified, then \emph{only} the individual value of M around the reference point is returned.
+}
+\value{
+  An object of class \code{fv}, see \code{\link{fv.object}}, which can be plotted directly using \code{\link{plot.fv}}.
+
+  If \code{Individual} is set to \code{TRUE}, the object also contains the value of the function around each individual \emph{ReferenceType} point taken as the only reference point. The column names of the \code{fv} are "M_" followed by the point names, i.e. the row names of the marks of the point pattern.
+}
+\references{
+  Marcon, E. and Puech, F. (2010). Measures of the Geographic Concentration of Industries: Improving Distance-Based Methods. \emph{Journal of Economic Geography} 10(5): 745-762.
+
+  Marcon, E., F. Puech and S. Traissac (2012). Characterizing the relative spatial structure of point patterns. \emph{International Journal of Ecology} 2012(Article ID 619281): 11.
+
+  Marcon, E., and Puech, F. (2017). A Typology of Distance-Based Measures of Spatial Concentration. \emph{Regional Science and Urban Economics} 62:56-67
+}
+\seealso{
+  \code{\link{MEnvelope}}, \code{\link{Kdhat}}
+}
+\examples{
+data(paracou16)
+autoplot(paracou16)
+
+# Calculate M
+autoplot(Mhat(paracou16, , "V. Americana", "Q. Rosea"))
+}
diff --git a/.#Extra/Local CI/ReadMe.txt b/.#Extra/Local CI/ReadMe.txt
@@ -0,0 +1,8 @@
+# MHat
+
+If Quantiles is TRUE, Monte-Carlo simulations are run to obtain the distribution of each individual M value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated).
+
+
+# rRandomLocation
+
+If ReferencePoint is not NULL, other points are shuffled but the reference point is kept untouched to compute the CI of its local M value.
diff --git a/.#Extra/Local CI/rRandomLocation.R b/.#Extra/Local CI/rRandomLocation.R
@@ -0,0 +1,63 @@
+rRandomLocation <-
+function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) {
+
+  if (CheckArguments)
+    CheckdbmssArguments()
+
+  if (inherits(X, "Dtable")) {
+    # Dtable case
+    Index <- seq_along(X$marks$PointType) 
+    if (ReferenceType != "") {
+      # Retain a single point type
+      ReferencePoints <- X$marks$PointType==ReferenceType
+      # Randomize the reference points
+      RandomizedReferences <- sample(Index[ReferencePoints])
+      # Replace randomized elements in the index
+      i <- o <- 1
+      while (i <= length(X$marks$PointType))
+      {
+        if (ReferencePoints[i]) {
+          Index[i] <- RandomizedReferences[o]
+          o <- o+1
+        }
+        i <- i+1
+      }
+    } else {
+      Index <- sample(Index)
+    }
+    # Apply the randomization to PointType and PointWeight
+    X$marks$PointType <- X$marks$PointType[Index]
+    X$marks$PointWeight <- X$marks$PointWeight[Index]
+    return(X)
+  } else {
+    # wmppp case
+    if (!is.null(ReferencePoint)) {
+      # The reference point must be < than the number of points
+      if (ReferencePoint > X$n) {
+        stop("The number of the reference point must be smaller than the number of points in the point pattern.")
+      }
+      # The reference point must belong to the reference point type
+      if (ReferenceType != "") {
+        if (X$marks$PointType[ReferencePoint] != ReferenceType) {
+          stop("The reference point must be of the reference point type.")
+        }
+      }
+      # Save the reference point
+      ReferencePoint_ppp <- X[ReferencePoint]
+      X <- X[-ReferencePoint]
+    }
+    if (ReferenceType != "") {
+      # Retain a single point type
+      X.reduced <- X[X$marks$PointType == ReferenceType]
+      RandomizedX <- rlabel(X.reduced)
+    } else {
+      RandomizedX <- rlabel(X)
+    }
+    if (!is.null(ReferencePoint)) {
+      # Restore the reference point with index 1
+      RandomizedX <- superimpose(ReferencePoint_ppp, RandomizedX)
+    }
+    class(RandomizedX) <- c("wmppp", "ppp")
+    return (RandomizedX)
+  }
+}