From acd0e7752f7940204bc7637c5f2495340454c411 Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Fri, 10 Jun 2022 21:43:18 -0400 Subject: [PATCH 1/2] adding missing dois --- paper/paper.bib | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paper/paper.bib b/paper/paper.bib index 606a4638..32912bd2 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -98,6 +98,7 @@ @article{pysal2007 number={1}, pages={5-27}, keywords={Open Source; Software; Spatial}, + doi={10.52324/001c.8285}, url={https://rrs.scholasticahq.com/article/8285.pdf} } @@ -157,7 +158,8 @@ @article{ReVelle1970 pages={30--42}, title={{Central Facilities Location}}, volume={2}, - year={1970} + year={1970}, + doi={10.1111/j.1538-4632.1970.tb00142.x} } @article{Hakimi1964, From c5f6efc11b82b0722458f2f127e6149894a099c4 Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sun, 12 Jun 2022 12:07:31 -0400 Subject: [PATCH 2/2] address reference concerns --- paper/paper.bib | 147 ++++++++++++++++++++++++++++++++++++++++++++---- paper/paper.md | 23 +++++--- 2 files changed, 152 insertions(+), 18 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index 32912bd2..f75c44ac 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1,6 +1,6 @@ @article{assunccao2006efficient, title={Efficient Regionalization Techniques for Socio-economic Geographical Units Using Minimum Spanning Trees}, - author={Assun{\c{c}}{\~a}o, Renato M and Neves, Marcos Corr{\^e}a and C{\^a}mara, Gilberto and da Costa Freitas, Corina}, + author={Assun{\c{c}}{\~a}o, Renato M. and Neves, Marcos Corr{\^e}a and C{\^a}mara, Gilberto and da Costa Freitas, Corina}, journal={International Journal of Geographical Information Science}, volume={20}, number={7}, @@ -26,7 +26,7 @@ @article{duque2012max @article{koopmans1949optimum, title={Optimum utilization of the transportation system}, - author={Koopmans, Tjalling C}, + author={Koopmans, Tjalling C.}, journal={Econometrica: Journal of the Econometric Society}, pages={136--146}, year={1949}, @@ -36,7 +36,7 @@ @article{koopmans1949optimum @article{miller1960integer, title={Integer programming formulation of traveling salesman problems}, - author={Miller, Clair E and Tucker, Albert W and Zemlin, Richard A}, + author={Miller, Clair E. and Tucker, Albert W. and Zemlin, Richard A.}, journal={Journal of the ACM (JACM)}, volume={7}, number={4}, @@ -59,7 +59,7 @@ @article{murray2021contemporary @article{wei2020efficient, title={Efficient Regionalization for Spatially Explicit Neighborhood Delineation}, - author={Wei, Ran and Rey, Sergio and Knaap, Elijah}, + author={Wei, Ran and Rey, Sergio J. and Knaap, Elijah}, journal={International Journal of Geographical Information Science}, pages={1--17}, year={2020}, @@ -90,7 +90,7 @@ @article{openshaw1995algorithms } @article{pysal2007, - author={Sergio Rey and Luc Anselin}, + author={Sergio J. Rey and Luc Anselin}, title={{PySAL: A Python Library of Spatial Analytical Methods}}, journal={The Review of Regional Studies}, year=2007, @@ -104,7 +104,7 @@ @article{pysal2007 @article{rey2015open, title={Open geospatial analytics with PySAL}, - author={Rey, Sergio J and Anselin, Luc and Li, Xun and Pahle, Robert and Laura, Jason and Li, Wenwen and Koschinsky, Julia}, + author={Rey, Sergio J. and Anselin, Luc and Li, Xun and Pahle, Robert and Laura, Jason and Li, Wenwen and Koschinsky, Julia}, journal={ISPRS International Journal of Geo-Information}, volume={4}, number={2}, @@ -125,13 +125,13 @@ @article{Rey2021 @article{wolf2020, title={Spatially-encouraged spectral clustering: a technique for blending map typologies and regionalization}, - author={Wolf, Levi}, + author={Wolf, Levi J.}, url= {https://osf.io/yzt2p/}, doi={10.31219/osf.io/yzt2p} } @article{Toregas1971, - author={Toregas, Constantine and Swain, R. and ReVelle, C. S. and Bergman, L.}, + author={Toregas, Constantine and Swain, R.W. and ReVelle, C.S. and Bergman, L.}, doi={10.1287/opre.19.6.1363}, journal={Operations Research}, number={6}, @@ -152,7 +152,7 @@ @article{Church1974 } @article{ReVelle1970, - author={ReVelle, C. S. and Swain, R.W.}, + author={ReVelle, C.S. and Swain, R.W.}, journal={Geographical Analysis}, number={1}, pages={30--42}, @@ -182,4 +182,131 @@ @misc{spopt2021 url = {https://github.com/pysal/spopt}, doi = {10.5281/zenodo.4444156}, keywords = {python,regionalization,spatial-optimization,location-modeling} -} \ No newline at end of file +} + +@article{mitchell2011pulp, + title={{PuLP: A Linear Programming Toolkit for Python}}, + author={Mitchell, Stuart and OSullivan, Michael and Dunning, Iain}, + journal={The University of Auckland, Auckland, New Zealand}, + volume={65}, + year={2011} +} + +@article{badr2015tool, + title={A tool for hierarchical climate regionalization}, + author={Badr, Hamada S. and Zaitchik, Benjamin F. and Dezfuli, Amin K.}, + journal={Earth Science Informatics}, + volume={8}, + number={4}, + pages={949--958}, + year={2015}, + publisher={Springer}, + doi={https://doi.org/10.1007/s12145-015-0221-7} +} + +@article{LEMUSCANOVAS2019114, + title = {synoptReg: An R package for computing a synoptic climate classification and a spatial regionalization of environmental data}, + journal = {Environmental Modelling \& Software}, + volume = {118}, + pages = {114--119}, + year = {2019}, + issn = {1364-8152}, + doi = {https://doi.org/10.1016/j.envsoft.2019.04.006}, + url = {https://www.sciencedirect.com/science/article/pii/S1364815219300064}, + author = {Marc Lemus-Canovas and Joan A. Lopez-Bustins and Javier Martin-Vide and Dominic Royé}, + keywords = {Alps, Environmental regionalization, R package, synoptReg, Synoptic classification}, +} + +@article{Chen2021, + title = {{Open-Source Approaches for Location Cover Models: Capabilities and Efficiency}}, + journal = {Journal of Geographical Systems}, + volume = {23}, + pages = {361--380}, + year = {2021}, + issn = {1435-5949}, + doi = {10.1007/s10109-021-00350-w}, + author = {Chen, Huanfa and Murray, Alan T. and Jiang, Rui}, +} + +@article{daru2020phyloregion, + title={{phyloregion: R package for biogeographical regionalization and macroecology}}, + author={Daru, Barnabas H. and Karunarathne, Piyal and Schliep, Klaus}, + journal={Methods in Ecology and Evolution}, + volume={11}, + number={11}, + pages={1483--1491}, + year={2020}, + doi={10.1111/2041-210X.13478} +} + +@article{regioneR2015, + author = {Gel, Bernat and Díez-Villanueva, Anna and Serra, Eduard and Buschbeck, Marcus and Peinado, Miguel A. and Malinverni, Roberto}, + title = {{regioneR: an R/Bioconductor package for the association analysis of genomic regions based on permutation tests}}, + journal = {Bioinformatics}, + volume = {32}, + number = {2}, + pages = {289--291}, + year = {2015}, + month = {09}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btv562}, +} + +@misc{nsRFA, + author = {Viglione, A.}, + year = {2009}, + title = {{nsRFA: Non-supervised Regional Frequency Analysis}}, + url = {http://www.CRAN.R-project.org/package=nsRFA}, + howpublished = {\url{http://www.CRAN.R-project.org/package=nsRFA}}, +} + +@Manual{rgeoda, + title = {rgeoda: R Library for Spatial Data Analysis}, + author = {Xun Li and Luc Anselin}, + year = {2022}, + note = {\url{https://github.com/geodacenter/rgeoda/}, \url{https://geodacenter.github.io/rgeoda/}}, +} + +@article{rahman2018sdpt3r, + author = {Rahman, Adam}, + year = {2019}, + month = {01}, + pages = {371}, + title = {{sdpt3r: Semidefinite Quadratic Linear Programming in R}}, + volume = {10}, + journal = {The R Journal}, + doi = {10.32614/RJ-2018-063} +} + +@misc{RBGL, + author = {Carey, V. and Long, L. and Gentleman, R.}, + year = {2022}, + title = {{RBGL: An interface to the BOOST graph library}}, + url = {https://www.bioconductor.org/packages/release/bioc/html/RBGL.html}, + howpublished = {\url{https://www.bioconductor.org/packages/release/bioc/html/RBGL.html}}, +} + +@misc{pyspatialopt, + author = {Pulver, Aaron}, + year = {2016}, + title = {{pyspatialopt: A library to create, solve, and analyze spatial optimization problems}}, + url = {https://github.com/apulverizer/pyspatialopt}, + howpublished = {\url{https://github.com/apulverizer/pyspatialopt}}, +} + +@misc{allagash, + author = {Pulver, Aaron}, + year = {2019}, + title = {{allagash: A spatial optmization library for covering problems}}, + url = {https://github.com/apulverizer/allagash}, + howpublished = {\url{https://github.com/apulverizer/allagash}}, +} + +@misc{maxcovr, + author = {Tierney, N.}, + year = {2019}, + title = {{maxcovr: A Set of Tools for Solving the Maximal Covering Location Problem}}, + url = {https://github.com/njtierney/maxcovr}, + howpublished = {\url{https://github.com/njtierney/maxcovr}}, +} + diff --git a/paper/paper.md b/paper/paper.md index d14ea931..99fbfa92 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -50,17 +50,24 @@ bibliography: paper.bib # Summary -Spatial optimization is a major spatial analytical tool in management and planning, the significance of which cannot be overstated. Spatial optimization models play an important role in designing and managing effective and efficient service systems such as transportation, education, public health, environmental protection, and commercial investment among others. To this end, spopt (\textbf{sp}atial \textbf{opt}imization) is under active development for the inclusion of newly proposed models and methods for regionalization, facility location, and transportation-oriented solutions [@spopt2021]. Spopt is a submodule in the open-source spatial analysis library PySAL (Python Spatial Analysis Library) founded by Dr. Serge Rey and Dr. Luc Anselin in 2005 [@pysal2007;@rey2015open;@Rey2021]. The goal of developing spopt is to provide management and decision-making support to all relevant practitioners and to further promote the appropriate and meaningful application of spatial optimization models in practice. +Spatial optimization is a major spatial analytical tool in management and planning, the significance of which cannot be overstated. Spatial optimization models play an important role in designing and managing effective and efficient service systems such as transportation, education, public health, environmental protection, and commercial investment among others. To this end, **spopt** (\textbf{sp}atial \textbf{opt}imization) is under active development for the inclusion of newly proposed models and methods for regionalization, facility location, and transportation-oriented solutions [@spopt2021]. **Spopt** is a submodule in the open-source spatial analysis library PySAL (Python Spatial Analysis Library) founded by Dr. Sergio J. Rey and Dr. Luc Anselin in 2005 [@pysal2007;@rey2015open;@Rey2021]. The goal of developing **spopt** is to provide management and decision-making support to all relevant practitioners and to further promote the appropriate and meaningful application of spatial optimization models in practice. # Statement of need -Spatial optimization methods/algorithms can be accessed in many ways. ArcGIS (https://www.esri.com/en-us/home) and TransCAD (https://www.caliper.com/) are two well-known commercial GIS software packages that provide modules designed for structuring and solving spatial optimization problems. The optimization functions they offer focus on a set classical single facility location methods (e.g., Weber, Median, Centroid, 1-center), routing and shortest path methods (e.g., shortest path on the network, least cost path over the terrain), and multi-facility location-allocation methods (e.g., coverage models, p-median problem). They are user-friendly and visually appealing, but the cost is relatively high [@murray2021contemporary]. +Spatial optimization methods/algorithms can be accessed in many ways. ArcGIS[^arc] and TransCAD[^transcad] are two well-known commercial GIS software packages that provide modules designed for structuring and solving spatial optimization problems. The optimization functions they offer focus on a set classical single facility location methods (e.g., Weber, Median, Centroid, 1-center), routing and shortest path methods (e.g., shortest path on the network, least cost path over the terrain), and multi-facility location-allocation methods (e.g., coverage models, p-median problem). They are user-friendly and visually appealing, but the cost is relatively high [@murray2021contemporary]. -Open-source software is another option to access spatial optimization. Although it may require users to have a certain level of programming experience, open-source software provides relatively novel and comprehensive methods, and more importantly, it is free and can be easily replicated. This is particularly true for regionalization and facility-location methods. Regionalization methods are limited in commercial GIS software, and may only have grouping analysis for vector data and region identification for raster data. On the contrary, there are many application-oriented open-source packages that facilitate the implementation of regionalization methods in various fields, including climate (e.g., HiClimR (https://cran.r-project.org/web/packages/HiClimR/index.html), synoptReg (https://cran.r-project.org/web/packages/synoptReg/index.html)), biography (e.g., Phyloregion (https://cran.r-project.org/web/packages/phyloregion/index.html), regioneR (http://bioconductor.org/packages/release/bioc/html/regioneR.html)), hydrology (e.g., nsRFA(https://cran.r-project.org/web/packages/nsRFA/index.html)), agricultural (e.g., OpenLCA (https://www.openlca.org/)), and so on. The functions of graph regionalization with clustering and partitioning have been provided by several packages such as Rgeoda, maxcut: Max-Cut Problem, RBGL: R Boost Graph Library, and grPartition. They are probably the most closely related projects to the regionalization section of spopt, however, they are written in R and MATLAB. For facility-location methods, commercial software such as TransCAD and ArcGIS implements models using a heuristic approach. However, they don't provide details about the solution found, which limits the interpretability of the results (Chen et al., 2021). On the other hand, existing open-source packages mostly aim at solving coverage problems such as PySpatialOpt (https://github.com/apulverizer/pyspatialopt), Allagash (https://apulverizer.github.io/allagash/) and maxcovr (https://github.com/njtierney/maxcovr), but the available models, solvers, and overall accessibility vary significantly. Therefore, it is necessary to develop an open-source optimization package written in Python that includes various types of classic facility-location methods with a wide range of supported optimization solvers. +[^arc]: https://www.esri.com/en-us/home +[^transcad]: https://www.caliper.com/ + +Open-source software is another option to access spatial optimization. Although it may require users to have a certain level of programming experience, open-source software provides relatively novel and comprehensive methods, and more importantly, it is free and can be easily replicated. This is particularly true for regionalization and facility-location methods. Regionalization methods are limited in commercial GIS software, and may only have grouping analysis for vector data and region identification for raster data. On the contrary, there are many application-oriented open-source packages that facilitate the implementation of regionalization methods in various fields, including climate (e.g., HiClimR [@badr2015tool], synoptReg [@LEMUSCANOVAS2019114]), biology (e.g., Phyloregion [@daru2020phyloregion], regioneR [@regioneR2015]), hydrology (e.g., nsRFA [@nsRFA]), agricultural (e.g., OpenLCA[^openlca]), and so on. The functions of graph regionalization with clustering and partitioning have been provided by several packages in R[^R] such as Rgeoda [@rgeoda], maxcut: Max-Cut Problem in sdpt3r [@rahman2018sdpt3r], and RBGL: R Boost Graph Library [@RBGL], and grPartition within MATLAB[^matlab]. They are probably the most closely related projects to the regionalization functionality of **spopt**, however, they are written in R and MATLAB. For facility-location methods, commercial software such as TransCAD and ArcGIS implements models using a heuristic approach. However, they don't provide details about the solution found, which limits the interpretability of the results [@Chen2021]. On the other hand, existing open-source packages mostly aim at solving coverage problems such as pyspatialopt [@pyspatialopt], allagash [@allagash] and maxcovr [@maxcovr], but the available models, solvers, and overall accessibility vary significantly. Therefore, it is necessary to develop an open-source optimization package written in Python that includes various types of classic facility-location methods with a wide range of supported optimization solvers. + +[^openlca]: https://www.openlca.org +[^R]: https://www.r-project.org +[^matlab]: https://www.mathworks.com/products/matlab.html # Current functionality -Originating from the region module in PySAL, spopt is under active development for the inclusion of newly proposed models and methods for regionalization and facility location. Regarding regionalization, six models are developed for aggregating a large set of geographic units (with small footprints) into a smaller number of regions (with large footprints). They are: +Originating from the region module in PySAL, **spopt** is under active development for the inclusion of newly proposed models and methods for regionalization and facility location. Regarding regionalization, six models are developed for aggregating a large set of geographic units (with small footprints) into a smaller number of regions (with large footprints). They are: 1. Max-p-regions: the clustering of a set of geographic areas into the maximum number of homogeneous and spatially contiguous regions such that the value of a spatially extensive regional attribute is above a predefined threshold [@duque2012max;@wei2020efficient]. 2. Spatially-encouraged spectral clustering (spenc): an algorithm to balance spatial and feature coherence using kernel combination in spectral clustering [@wolf2020]. @@ -94,7 +101,7 @@ It results in five regions, three of which have six states, and two with seven s ![The solution of Max-p-regions when 32 Mexican states are clustered into the maximum number of regions such that each region has at least 6 states and homogeneity in per capita gross domestic product in 2000 is maximized.\label{fig: maxp}](figs/mexico_maxp.png) -For facility-location, four models, including two coverage models and two location-allocation models based on median and center problems, are developed using an exact approach. +For facility-location, four models, including two coverage models and two location-allocation models based on median and center problems, are developed using an exact approach with pulp [@mitchell2011pulp] providing an interface to installed solvers. 1. Location Set Covering Problem (LSCP): Finding the minimum number of facilities and their locations such that all demands are covered within the maximal distance or time standard [@Toregas1971]. 2. Maximal Covering Location Problem (MCLP): Locating a prespecified number of facilities such that demand coverage within a maximal service distance or time is maximized [@Church1974]. @@ -128,21 +135,21 @@ demand_points_gdf = geopandas.GeoDataFrame( ).sort_values(by=["NAME"]).reset_index() # plot results n_facilities, title = facility_points_gdf.shape[0], f"MCLP ($p$={p})" -#plot_results(mclp, facility_points_gdf, demand_points_gdf, n_facilities, title) +plot_results(mclp, facility_points_gdf, demand_points_gdf, n_facilities, title) ``` ![The solution of MCLP while siting 4 facilities using 5 kilometers as the maximum service distance between facilities and demand locations. See the "Real World Facility Location" tutorial (https://pysal.org/spopt/notebooks/facloc-real-world.html) for more details.\label{fig: mclp}](figs/mclp.png) # Planned Enhancements -Spopt is under active development and the spopt developers look forward to your extensive attention and participation. In the near future, there are three major enhancements we plan to pursue for spopt: +**Spopt** is under active development and the developers look forward to your extensive attention and participation. In the near future, there are three major enhancements we plan to pursue for **spopt**: 1. The first stream will be on the enhancement of regionalization algorithms by including several novel extensions of the classical regionalization models, such as the integration of spatial data uncertainty and the shape of identified regions in the max-p-regions problem. 2. The second direction involves adding capacity constraints and includes a polygon partial coverage on facility location models. No commercial and open-source software has provided these features before. 3. We anticipate adding functionality for solving traditional routing and transportation-oriented optimization problems. Initially, this will come in the form of integer programming formulations of the Travelling Salesperson Problem [@miller1960integer] and the Transportation Problem [@koopmans1949optimum]. # Acknowledgements -We would like to thank all the contributors to this package. Besides, we would like to extend our gratitude to all the users for inspiring and questioning this package to make it better. Spopt development was partially supported by National Science Foundation Award #1831615 RIDIR: Scalable Geospatial Analytics for Social Science Research. +We would like to thank all the contributors to this package. Besides, we would like to extend our gratitude to all the users for inspiring and questioning this package to make it better. **Spopt** development was partially supported by National Science Foundation Award #1831615 RIDIR: Scalable Geospatial Analytics for Social Science Research. The following acknowledgement applies to James D. Gaboardi (affiliation 4) *only*: