Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lebovits/issu946 add backup vacant land data #960

Merged
merged 9 commits into from
Oct 18, 2024
1 change: 1 addition & 0 deletions data/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ services:
- CLEAN_GREEN_GOOGLE_KEY
- PYTHONUNBUFFERED=1
- GOOGLE_CLOUD_BUCKET_NAME
- GOOGLE_CLOUD_PROJECT
- CAGP_SLACK_API_TOKEN
volumes:
- ./src:/usr/src/app
Expand Down
3 changes: 2 additions & 1 deletion data/src/classes/featurelayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ def google_cloud_bucket() -> Bucket:

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl")
project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly")

storage_client = storage.Client(project="clean-and-green-philly")
storage_client = storage.Client(project=project_name)
return storage_client.bucket(bucket_name)


Expand Down
1 change: 1 addition & 0 deletions data/src/data_utils/access_process.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any


def access_process(dataset: Any) -> Any:
"""
Process a dataset to determine the access process for each property based on
Expand Down
123 changes: 4 additions & 119 deletions data/src/data_utils/drug_crimes.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,10 @@
import mapclassify
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from awkde.awkde import GaussianKDE
from classes.featurelayer import FeatureLayer
from config.config import USE_CRS
from constants.services import DRUGCRIME_SQL_QUERY
from rasterio.transform import Affine


def drug_crimes(primary_featurelayer):
# Initialize gun_crimes object
drug_crimes = FeatureLayer(
name="Drug Crimes", carto_sql_queries=DRUGCRIME_SQL_QUERY
)

# Extract x, y coordinates from geometry
x = np.array([])
y = np.array([])

for geom in drug_crimes.gdf.geometry:
coords = np.array(geom.xy)
x = np.concatenate([x, coords[0]])
y = np.concatenate([y, coords[1]])

# Prepare data for KDE
X = np.array(list(zip(x, y)))

# Generate grid for plotting
grid_length = 2500

x_grid, y_grid = (
np.linspace(x.min(), x.max(), grid_length),
np.linspace(y.min(), y.max(), grid_length),
)
xx, yy = np.meshgrid(x_grid, y_grid)
grid_points = np.array([xx.ravel(), yy.ravel()]).T

# Compute adaptive KDE values
print("fitting KDE for drug crime data")
kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True)
kde.fit(X)

z = kde.predict(grid_points)
zz = z.reshape(xx.shape)

# Calculate resolutions and min values
x_res = (x.max() - x.min()) / (len(x_grid) - 1)
y_res = (y.max() - y.min()) / (len(y_grid) - 1)
min_x, min_y = x.min(), y.min()

# Save the plot in tmp folder
plt.pcolormesh(xx, yy, zz)
plt.scatter(x, y, c="red", s=0.005)
plt.colorbar()
plt.savefig("tmp/kde.png")

# Define the affine transform
transform = Affine.translation(min_x, min_y) * Affine.scale(x_res, y_res)

# Export as raster
with rasterio.open(
"tmp/drug_crimes.tif",
"w",
driver="GTiff",
height=zz.shape[0],
width=zz.shape[1],
count=1,
dtype=zz.dtype,
crs=USE_CRS,
transform=transform,
) as dst:
dst.write(zz, 1)
from data_utils.kde import apply_kde_to_primary

primary_featurelayer.gdf["centroid"] = primary_featurelayer.gdf.geometry.centroid

coord_list = [
(x, y)
for x, y in zip(
primary_featurelayer.gdf["centroid"].x,
primary_featurelayer.gdf["centroid"].y,
)
]

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"])

src = rasterio.open("tmp/drug_crimes.tif")
sampled_values = [x[0] for x in src.sample(coord_list)]

primary_featurelayer.gdf["drugcrime_density"] = sampled_values

percentile_breaks = list(range(101)) # [0, 1, 2, ..., 100]

drugcrime_classifier = mapclassify.Percentiles(
primary_featurelayer.gdf["drugcrime_density"], pct=percentile_breaks
)

primary_featurelayer.gdf["drugcrime_density_percentile"] = primary_featurelayer.gdf[
"drugcrime_density"
].apply(drugcrime_classifier)

def label_percentile(value):
if value == 1:
return "1st Percentile"
elif value == 2:
return "2nd Percentile"
elif value == 3:
return "3rd Percentile"
else:
return f"{value}th Percentile"

primary_featurelayer.gdf["drugcrime_density_label"] = primary_featurelayer.gdf[
"drugcrime_density_percentile"
].apply(label_percentile)

primary_featurelayer.gdf["drugcrime_density_percentile"] = primary_featurelayer.gdf[
"drugcrime_density_percentile"
].astype(float)

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(
columns=["drugcrime_density"]
def drug_crimes(primary_featurelayer):
return apply_kde_to_primary(
primary_featurelayer, "Drug Crimes", DRUGCRIME_SQL_QUERY
)

return primary_featurelayer
121 changes: 3 additions & 118 deletions data/src/data_utils/gun_crimes.py
Original file line number Diff line number Diff line change
@@ -1,123 +1,8 @@
import mapclassify
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from awkde.awkde import GaussianKDE
from classes.featurelayer import FeatureLayer
from config.config import USE_CRS
from constants.services import GUNCRIME_SQL_QUERY
from rasterio.transform import Affine


def gun_crimes(primary_featurelayer):
# Initialize gun_crimes object
gun_crimes = FeatureLayer(name="Gun Crimes", carto_sql_queries=GUNCRIME_SQL_QUERY)

# Extract x, y coordinates from geometry
x = np.array([])
y = np.array([])

for geom in gun_crimes.gdf.geometry:
coords = np.array(geom.xy)
x = np.concatenate([x, coords[0]])
y = np.concatenate([y, coords[1]])

# Prepare data for KDE
X = np.array(list(zip(x, y)))

# Generate grid for plotting
grid_length = 2500

x_grid, y_grid = (
np.linspace(x.min(), x.max(), grid_length),
np.linspace(y.min(), y.max(), grid_length),
)
xx, yy = np.meshgrid(x_grid, y_grid)
grid_points = np.array([xx.ravel(), yy.ravel()]).T

# Compute adaptive KDE values
print("fitting KDE for gun crime data")
kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True)
kde.fit(X)

z = kde.predict(grid_points)
zz = z.reshape(xx.shape)

# Calculate resolutions and min values
x_res = (x.max() - x.min()) / (len(x_grid) - 1)
y_res = (y.max() - y.min()) / (len(y_grid) - 1)
min_x, min_y = x.min(), y.min()

# Save the plot in tmp folder
plt.pcolormesh(xx, yy, zz)
plt.scatter(x, y, c="red", s=0.005)
plt.colorbar()
plt.savefig("tmp/kde.png")

# Define the affine transform
transform = Affine.translation(min_x, min_y) * Affine.scale(x_res, y_res)

# Export as raster
with rasterio.open(
"tmp/gun_crimes.tif",
"w",
driver="GTiff",
height=zz.shape[0],
width=zz.shape[1],
count=1,
dtype=zz.dtype,
crs=USE_CRS,
transform=transform,
) as dst:
dst.write(zz, 1)
from data_utils.kde import apply_kde_to_primary

primary_featurelayer.gdf["centroid"] = primary_featurelayer.gdf.geometry.centroid

coord_list = [
(x, y)
for x, y in zip(
primary_featurelayer.gdf["centroid"].x,
primary_featurelayer.gdf["centroid"].y,
)
]

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"])

src = rasterio.open("tmp/gun_crimes.tif")
sampled_values = [x[0] for x in src.sample(coord_list)]

primary_featurelayer.gdf["guncrime_density"] = sampled_values

percentile_breaks = list(range(101)) # [0, 1, 2, ..., 100]

guncrime_classifier = mapclassify.Percentiles(
primary_featurelayer.gdf["guncrime_density"], pct=percentile_breaks
)

primary_featurelayer.gdf["guncrime_density_percentile"] = primary_featurelayer.gdf[
"guncrime_density"
].apply(guncrime_classifier)

def label_percentile(value):
if value == 1:
return "1st Percentile"
elif value == 2:
return "2nd Percentile"
elif value == 3:
return "3rd Percentile"
else:
return f"{value}th Percentile"

primary_featurelayer.gdf["guncrime_density_label"] = primary_featurelayer.gdf[
"guncrime_density_percentile"
].apply(label_percentile)

primary_featurelayer.gdf["guncrime_density_percentile"] = primary_featurelayer.gdf[
"guncrime_density_percentile"
].astype(float)

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(
columns=["guncrime_density"]
)

return primary_featurelayer
def gun_crimes(primary_featurelayer):
return apply_kde_to_primary(primary_featurelayer, "Gun Crimes", GUNCRIME_SQL_QUERY)
Loading
Loading