From 48ce7cfad0a571579d2df4912f376d493a1e8472 Mon Sep 17 00:00:00 2001 From: renanxcortes Date: Thu, 11 Jul 2019 15:54:14 -0700 Subject: [PATCH 1/2] enh: add robust and informative condition for large magnitude projections (tests only off-diagonal) --- segregation/spatial/spatial_indexes.py | 52 +++++++++++++++----------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/segregation/spatial/spatial_indexes.py b/segregation/spatial/spatial_indexes.py index b5a8bf60..4e9286ab 100644 --- a/segregation/spatial/spatial_indexes.py +++ b/segregation/spatial/spatial_indexes.py @@ -1030,13 +1030,14 @@ def _distance_decay_isolation(data, 'c_lons': c_lons })) # This needs to be latitude first! - np.fill_diagonal(dist, val=(alpha * data.area)**(beta)) c = np.exp(-dist) - - Pij = np.multiply(c, t) / np.sum(np.multiply(c, t), axis=1) - if np.isnan(Pij).sum() > 0: - raise ValueError('It not possible to determine the distance between, at least, one pair of units. This is probably due to the magnitude of the number of the centroids. We recommend to reproject the geopandas DataFrame.') + if c.sum() < 10 ** (-15): + raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') + + np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + + Pij = np.multiply(c, t) / np.sum(np.multiply(c, t), axis=1) DDxPx = (np.array(x / X) * np.nansum(np.multiply(Pij, np.array(x / t)), axis=1)).sum() @@ -1261,13 +1262,14 @@ def _distance_decay_exposure(data, 'c_lons': c_lons })) # This needs to be latitude first! - np.fill_diagonal(dist, val=(alpha * data.area)**(beta)) c = np.exp(-dist) - - Pij = np.multiply(c, t) / np.sum(np.multiply(c, t), axis=1) - if np.isnan(Pij).sum() > 0: - raise ValueError('It not possible to determine the distance between, at least, one pair of units. This is probably due to the magnitude of the number of the centroids. We recommend to reproject the geopandas DataFrame.') + if c.sum() < 10 ** (-15): + raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') + + np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + + Pij = np.multiply(c, t) / np.sum(np.multiply(c, t), axis=1) DDxPy = (x / X * np.nansum(np.multiply(Pij, y / t), axis=1)).sum() @@ -1489,16 +1491,17 @@ def _spatial_proximity(data, 'c_lons': c_lons })) # This needs to be latitude first! - np.fill_diagonal(dist, val=(alpha * data.area)**(beta)) c = np.exp(-dist) - + + if c.sum() < 10 ** (-15): + raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') + + np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + Pxx = ((np.array(data.xi) * c).T * np.array(data.xi)).sum() / X**2 Pyy = ((np.array(data.yi) * c).T * np.array(data.yi)).sum() / Y**2 Ptt = ((np.array(data.ti) * c).T * np.array(data.ti)).sum() / T**2 SP = (X * Pxx + Y * Pyy) / (T * Ptt) - - if np.isnan(SP): - raise ValueError('It not possible to determine the distance between, at least, one pair of units. This is probably due to the magnitude of the number of the centroids. We recommend to reproject the geopandas DataFrame.') core_data = data[['group_pop_var', 'total_pop_var', 'geometry']] @@ -1714,14 +1717,15 @@ def _absolute_clustering(data, 'c_lons': c_lons })) # This needs to be latitude first! - np.fill_diagonal(dist, val=(alpha * data.area)**(beta)) c = np.exp(-dist) - + + if c.sum() < 10 ** (-15): + raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') + + np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + ACL = ((((x/X) * (c * x).sum(axis = 1)).sum()) - ((X / n**2) * c.sum())) / \ ((((x/X) * (c * t).sum(axis = 1)).sum()) - ((X / n**2) * c.sum())) - - if np.isnan(ACL): - raise ValueError('It not possible to determine the distance between, at least, one pair of units. This is probably due to the magnitude of the number of the centroids. We recommend to reproject the geopandas DataFrame.') core_data = data[['group_pop_var', 'total_pop_var', 'geometry']] @@ -1927,9 +1931,13 @@ def _relative_clustering(data, 'c_lons': c_lons })) # This needs to be latitude first! - np.fill_diagonal(dist, val=(alpha * data.area)**(beta)) c = np.exp(-dist) - + + if c.sum() < 10 ** (-15): + raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') + + np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + Pxx = ((np.array(data.xi) * c).T * np.array(data.xi)).sum() / X**2 Pyy = ((np.array(data.yi) * c).T * np.array(data.yi)).sum() / Y**2 RCL = Pxx / Pyy - 1 From c6df1e1ddfced58ae59529ec514fcd02af83f483 Mon Sep 17 00:00:00 2001 From: renanxcortes Date: Thu, 11 Jul 2019 16:44:41 -0700 Subject: [PATCH 2/2] bug: add negative sign on d_ii on main diagonal --- segregation/spatial/spatial_indexes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/segregation/spatial/spatial_indexes.py b/segregation/spatial/spatial_indexes.py index 4e9286ab..ab80c03d 100644 --- a/segregation/spatial/spatial_indexes.py +++ b/segregation/spatial/spatial_indexes.py @@ -1035,7 +1035,7 @@ def _distance_decay_isolation(data, if c.sum() < 10 ** (-15): raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') - np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + np.fill_diagonal(c, val = np.exp(-(alpha * data.area)**(beta))) Pij = np.multiply(c, t) / np.sum(np.multiply(c, t), axis=1) @@ -1267,7 +1267,7 @@ def _distance_decay_exposure(data, if c.sum() < 10 ** (-15): raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') - np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + np.fill_diagonal(c, val = np.exp(-(alpha * data.area)**(beta))) Pij = np.multiply(c, t) / np.sum(np.multiply(c, t), axis=1) @@ -1496,7 +1496,7 @@ def _spatial_proximity(data, if c.sum() < 10 ** (-15): raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') - np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + np.fill_diagonal(c, val = np.exp(-(alpha * data.area)**(beta))) Pxx = ((np.array(data.xi) * c).T * np.array(data.xi)).sum() / X**2 Pyy = ((np.array(data.yi) * c).T * np.array(data.yi)).sum() / Y**2 @@ -1722,7 +1722,7 @@ def _absolute_clustering(data, if c.sum() < 10 ** (-15): raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') - np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + np.fill_diagonal(c, val = np.exp(-(alpha * data.area)**(beta))) ACL = ((((x/X) * (c * x).sum(axis = 1)).sum()) - ((X / n**2) * c.sum())) / \ ((((x/X) * (c * t).sum(axis = 1)).sum()) - ((X / n**2) * c.sum())) @@ -1936,7 +1936,7 @@ def _relative_clustering(data, if c.sum() < 10 ** (-15): raise ValueError('It not possible to determine accurately the exponential of the negative distances. This is probably due to the large magnitude of the centroids numbers. It is recommended to reproject the geopandas DataFrame. Also, if this is a not lat-long CRS, it is recommended to set metric to \'haversine\'') - np.fill_diagonal(c, val = np.exp((alpha * data.area)**(beta))) + np.fill_diagonal(c, val = np.exp(-(alpha * data.area)**(beta))) Pxx = ((np.array(data.xi) * c).T * np.array(data.xi)).sum() / X**2 Pyy = ((np.array(data.yi) * c).T * np.array(data.yi)).sum() / Y**2