Skip to content

Commit

Permalink
Merge pull request #2 from EricMarcon/JSPE
Browse files Browse the repository at this point in the history
JSPE
  • Loading branch information
EricMarcon authored Dec 11, 2023
2 parents a814b98 + e580268 commit 93c097c
Show file tree
Hide file tree
Showing 22 changed files with 1,539 additions and 565 deletions.
194 changes: 194 additions & 0 deletions .#Extra/Local CI/Mhat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
Mhat <-
function(X, r = NULL, ReferenceType, NeighborType = ReferenceType,
CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL,
Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05,
verbose = interactive(), CheckArguments = TRUE)
{
# Eliminate erroneous configurations
if (CheckArguments) {
CheckdbmssArguments()
if (CaseControl & (ReferenceType==NeighborType)) {
warning("Cases and controls are identical.")
return(rep(1,length(r)))
}
if (Quantiles & !Individual)
stop("Quantiles can't be TRUE if Individual is FALSE.")
}

# Default r values: 64 values up to half the max distance
if (is.null(r)) {
if (inherits(X, "Dtable")) {
# Dtable case
rMax <- max(X$Dmatrix)
} else {
# wmppp case
rMax <- diameter(X$window)
}
r <- rMax*c(0, 1:20, seq(22, 40, 2), seq(45, 100,5), seq(110, 200, 10), seq(220, 400, 20))/800
}

# Vectors to recognize point types
IsReferenceType <- X$marks$PointType==ReferenceType
IsNeighborType <- X$marks$PointType==NeighborType

if (!is.null(ReferencePoint)) {
# Set individual to TRUE if a refernce point is given
Individual <- TRUE
if (IsReferenceType[ReferencePoint]) {
# Remember the name of the reference point in the dataset of reference type
ReferencePoint_name <- row.names(X$marks[ReferencePoint, ])
} else {
# The reference point must be in the reference type
stop("The reference point must be of the reference point type.")
}
}

# Global ratio
if (ReferenceType==NeighborType | CaseControl) {
WrMinusReferencePoint <- sum(X$marks$PointWeight[IsReferenceType])-X$marks$PointWeight
Wn <- WrMinusReferencePoint[IsReferenceType]
} else {
Wn <- sum(X$marks$PointWeight[IsNeighborType])
}
if (CaseControl) {
Wa <- sum(X$marks$PointWeight[IsNeighborType])
} else {
WaMinusReferencePoint <- sum(X$marks$PointWeight)-X$marks$PointWeight
Wa <- WaMinusReferencePoint[IsReferenceType]
}
GlobalRatio <- Wn/Wa

Nr <- length(r)
# Neighborhoods (i.e. all neighbors of a point less than a distance apart)
# Store weights of neighbors of interest in first Nr columns, all points from Nr+1 to 2*Nr

# Call C routine to fill Nbd
if (CaseControl) {
if (inherits(X, "Dtable")) {
# Dtable case
Nbd <- parallelCountNbdDtCC(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType)
} else {
# wmppp case
Nbd <- parallelCountNbdCC(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType)
}
} else {
if (inherits(X, "Dtable")) {
# Dtable case
Nbd <- parallelCountNbdDt(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType)
} else {
# wmppp case
Nbd <- parallelCountNbd(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType)
}
}

# Cumulate weights up to each distance
NbdInt <- t(apply(Nbd[, seq_len(Nr)], 1, cumsum))
NbdAll <- t(apply(Nbd[, (Nr + 1):(2 * Nr)], 1, cumsum))

# Calculate the ratio of points of interest around each point
LocalRatio <- NbdInt/NbdAll

if (is.null(ReferencePoint)) {
# Divide it by the global ratio. Ignore points with no neighbor at all.
Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)]))
# Keep individual values
if (Individual) {
Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio))
}
} else {
# Find the reference point in the set of points of the reference type
ReferencePoint_index <- which(rownames(X[IsReferenceType]$marks) == ReferencePoint_name)
# Only keep the value of the reference point
Mvalues <- LocalRatio[ReferencePoint_index, ]/GlobalRatio[ReferencePoint_index]
}

# Put the results into an fv object
MEstimate <- data.frame(r, rep(1, length(r)), Mvalues)
ColNames <- c("r", "theo", "M")
Labl <- c("r", "%s[theo](r)", "hat(%s)(r)")
Desc <- c("Distance argument r", "Theoretical independent %s", "Estimated %s")
if (Individual & is.null(ReferencePoint)) {
# ColNumbers will usually be line numbers of the marks df, but may be real names.
ColNumbers <- row.names(X$marks[IsReferenceType, ])
ColNames <- c(ColNames, paste("M", ColNumbers, sep="_"))
Labl <- c(Labl, paste("hat(%s)[", ColNumbers, "](r)", sep=""))
Desc <- c(Desc, paste("Individual %s around point", ColNumbers))
}
colnames(MEstimate) <- ColNames

# Make an fv object
M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M",
fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl,
desc=Desc, unitname=X$window$unit, fname="M")
fvnames(M, ".") <- ColNames[-1]


# Calculate the quantiles of the individual values with respect to the null hypothesis
if (Quantiles) {
# Run Monte Carlo simulations of Mhat for each reference point
nReferencePoints <- sum(IsReferenceType)
# Prepare a matrix to save quantiles
MQuantiles <- matrix(0, nrow = length(r), ncol = nReferencePoints)
colnames(MQuantiles) <- paste("M", ColNumbers, sep="_")
rownames(MQuantiles) <- r
if (verbose) ProgressBar <- utils::txtProgressBar(min = 0, max = nReferencePoints)
for (i in seq_len(nReferencePoints)) {
# Null hypothesis
SimulatedPP <- expression(
rRandomLocation(
X,
ReferencePoint = which(X$marks$PointType == ReferenceType)[i],
CheckArguments = FALSE
)
)
# Compute the simulations. The envelope is useless: we need the simulated values.
Envelope <- envelope(
# The value Mhat(X) is not used but the point #1 must be of ReferenceType
# so retain points of ReferenceType only to save time
X[X$marks$PointType == ReferenceType],
fun = Mhat,
nsim = NumberOfSimulations,
# nrank may be any value because the envelope is not used
nrank = 1,
# Arguments for Mhat()
r = r,
ReferenceType = ReferenceType,
NeighborType = NeighborType,
CaseControl = CaseControl,
Individual = TRUE,
# The reference point is always 1 after rRandomLocation()
ReferencePoint = 1,
CheckArguments = FALSE,
# Arguments for envelope()
simulate = SimulatedPP,
# Do not show the progress
verbose = FALSE,
# Save individual simulations into attribute simfuns
savefuns = TRUE
)
# Get the distribution of simulated values (eliminate the "r" column).
Simulations <- as.matrix(attr(Envelope, "simfuns"))[, -1]
# Calculate the quantiles of observed values
for (r_i in seq_along(r)) {
if (any(!is.na(Simulations[r_i, ]))) {
# Check that at least one simulated value is not NaN so that ecdf() works.
MQuantiles[r_i, i] <- stats::ecdf(Simulations[r_i, ])(Mvalues[r_i, i])
} else {
MQuantiles[r_i, i] <- NaN
}
}
# Progress bar
if (verbose) utils::setTxtProgressBar(ProgressBar, i)
}
if (verbose) close(ProgressBar)
# Save the quantiles as an attribute of the fv
attr(M, "Quantiles") <- MQuantiles
attr(M, "Alpha") <- Alpha
}

if (Individual & is.null(ReferencePoint)) {
# Save the reference type for future smoothing
attr(M, "ReferenceType") <- ReferenceType
}
return(M)
}
81 changes: 81 additions & 0 deletions .#Extra/Local CI/Mhat.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
\name{Mhat}
\alias{Mhat}
\title{
Estimation of the M function
}
\description{
Estimates the \emph{M} function
}
\usage{
Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType,
CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL,
Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05,
verbose = interactive(), CheckArguments = TRUE)
}
\arguments{
\item{X}{
A weighted, marked planar point pattern (\code{\link{wmppp.object}}) or a \code{\link{Dtable}} object.
}
\item{r}{
A vector of distances. If \code{NULL}, a default value is set: 64 unequally spaced values are used up to half the maximum distance between points \eqn{d_m}. The first value is 0, first steps are small (\eqn{d_m/800}) then increase progressively up to \eqn{d_m/40}.
}
\item{ReferenceType}{
One of the point types.
}
\item{ReferencePoint}{
The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern.
}
\item{NeighborType}{
One of the point types. By default, the same as reference type.
}
\item{CaseControl}{
Logical; if \code{TRUE}, the case-control version of \emph{M} is computed. \emph{ReferenceType} points are cases, \emph{NeighborType} points are controls.
}
\item{Individual}{
Logical; if \code{TRUE}, values of the function around each individual point are returned.
}
\item{Quantiles}{
If \code{TRUE}, Monte-Carlo simulations are run to obtain the distribution of each individual \emph{M} value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated).
}
\item{NumberOfSimulations}{
The number of simulations to run, 100 by default.
}
\item{Alpha}{
The risk level, 5\% by default.
}
\item{verbose}{
Logical; if \code{TRUE}, print progress reports during the simulations.
}
\item{CheckArguments}{
Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere.
}
}
\details{
\emph{M} is a weighted, cumulative, relative measure of a point pattern structure. Its value at any distance is the ratio of neighbors of the \emph{NeighborType} to all points around \emph{ReferenceType} points, normalized by its value over the windows.

If \emph{CaseControl} is \code{TRUE}, then \emph{ReferenceType} points are cases and \emph{NeighborType} points are controls. The univariate concentration of cases is calculated as if \emph{NeighborType} was equal to \emph{ReferenceType}, but only controls are considered when counting all points around cases (Marcon et al., 2012). This makes sense when the sampling design is such that all points of \emph{ReferenceType} (the cases) but only a sample of the other points (the controls) are recorded. Then, the whole distribution of points is better represented by the controls alone.

If \emph{Individual} is \code{TRUE}, then the individual values \emph{M} in the neighborhood of each point are returned. If \emph{ReferencePoint} is also specified, then \emph{only} the individual value of M around the reference point is returned.
}
\value{
An object of class \code{fv}, see \code{\link{fv.object}}, which can be plotted directly using \code{\link{plot.fv}}.

If \code{Individual} is set to \code{TRUE}, the object also contains the value of the function around each individual \emph{ReferenceType} point taken as the only reference point. The column names of the \code{fv} are "M_" followed by the point names, i.e. the row names of the marks of the point pattern.
}
\references{
Marcon, E. and Puech, F. (2010). Measures of the Geographic Concentration of Industries: Improving Distance-Based Methods. \emph{Journal of Economic Geography} 10(5): 745-762.

Marcon, E., F. Puech and S. Traissac (2012). Characterizing the relative spatial structure of point patterns. \emph{International Journal of Ecology} 2012(Article ID 619281): 11.

Marcon, E., and Puech, F. (2017). A Typology of Distance-Based Measures of Spatial Concentration. \emph{Regional Science and Urban Economics} 62:56-67
}
\seealso{
\code{\link{MEnvelope}}, \code{\link{Kdhat}}
}
\examples{
data(paracou16)
autoplot(paracou16)

# Calculate M
autoplot(Mhat(paracou16, , "V. Americana", "Q. Rosea"))
}
8 changes: 8 additions & 0 deletions .#Extra/Local CI/ReadMe.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# MHat

If Quantiles is TRUE, Monte-Carlo simulations are run to obtain the distribution of each individual M value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated).


# rRandomLocation

If ReferencePoint is not NULL, other points are shuffled but the reference point is kept untouched to compute the CI of its local M value.
63 changes: 63 additions & 0 deletions .#Extra/Local CI/rRandomLocation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
rRandomLocation <-
function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) {

if (CheckArguments)
CheckdbmssArguments()

if (inherits(X, "Dtable")) {
# Dtable case
Index <- seq_along(X$marks$PointType)
if (ReferenceType != "") {
# Retain a single point type
ReferencePoints <- X$marks$PointType==ReferenceType
# Randomize the reference points
RandomizedReferences <- sample(Index[ReferencePoints])
# Replace randomized elements in the index
i <- o <- 1
while (i <= length(X$marks$PointType))
{
if (ReferencePoints[i]) {
Index[i] <- RandomizedReferences[o]
o <- o+1
}
i <- i+1
}
} else {
Index <- sample(Index)
}
# Apply the randomization to PointType and PointWeight
X$marks$PointType <- X$marks$PointType[Index]
X$marks$PointWeight <- X$marks$PointWeight[Index]
return(X)
} else {
# wmppp case
if (!is.null(ReferencePoint)) {
# The reference point must be < than the number of points
if (ReferencePoint > X$n) {
stop("The number of the reference point must be smaller than the number of points in the point pattern.")
}
# The reference point must belong to the reference point type
if (ReferenceType != "") {
if (X$marks$PointType[ReferencePoint] != ReferenceType) {
stop("The reference point must be of the reference point type.")
}
}
# Save the reference point
ReferencePoint_ppp <- X[ReferencePoint]
X <- X[-ReferencePoint]
}
if (ReferenceType != "") {
# Retain a single point type
X.reduced <- X[X$marks$PointType == ReferenceType]
RandomizedX <- rlabel(X.reduced)
} else {
RandomizedX <- rlabel(X)
}
if (!is.null(ReferencePoint)) {
# Restore the reference point with index 1
RandomizedX <- superimpose(ReferencePoint_ppp, RandomizedX)
}
class(RandomizedX) <- c("wmppp", "ppp")
return (RandomizedX)
}
}
Loading

0 comments on commit 93c097c

Please sign in to comment.