Skip to content

Commit

Permalink
Merge pull request #95 from urbanbigdatacentre/ideamaps_manuella
Browse files Browse the repository at this point in the history
changes to line 2 of GB model
  • Loading branch information
andy-clarke-uofg authored Sep 13, 2024
2 parents d8ca44c + 7cbbcf0 commit 3c3a06c
Showing 1 changed file with 57 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,31 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "102a4697-6fc7-41ad-a3a0-72dd612ce104",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10-Fold Cross-Validation Scores for GBM: [0.69230769 0.73076923 0.84615385 0.80769231 0.65384615 0.88461538\n",
" 0.80769231 0.61538462 0.6 0.56 ]\n",
"Mean CV Score: 0.7198461538461538\n",
"Accuracy on the Test Set: 0.7846153846153846\n",
"Missing values in 'MI_cat': 325031\n",
"Missing Count: 325031, Predicted Count: 325031\n",
"Predictions saved to Lagos_MIcat_with_predictions_GB.csv\n"
"10-Fold Cross-Validation Scores for GBM: [0.75 0.73076923 0.80769231 0.75 0.78846154 0.69230769\n",
" 0.80392157 0.56862745 0.68627451 0.66666667]\n",
"Mean CV Score: 0.72447209653092\n",
"Accuracy on the Test Set: 0.7364341085271318\n",
"Missing values in 'MI_cat': 109818\n",
"Missing Count: 109818, Predicted Count: 109818\n",
"Predictions saved to Nairobi_MIcat_with_predictions_GB.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1355/1048240529.py:75: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"/tmp/ipykernel_462/160971822.py:77: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"\n",
" ingestion_df['latitude'] = City_MIcat.geometry.centroid.y\n",
"/tmp/ipykernel_1355/1048240529.py:76: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"/tmp/ipykernel_462/160971822.py:78: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"\n",
" ingestion_df['longitude'] = City_MIcat.geometry.centroid.x\n"
]
Expand All @@ -35,7 +35,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Ingestion data saved to Lagos_MIcat_ingestion_GB.csv\n"
"Ingestion data saved to Nairobi_MIcat_ingestion_GB.csv\n"
]
}
],
Expand All @@ -45,7 +45,9 @@
"import geopandas as gpd\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"from sklearn.model_selection import train_test_split, cross_val_score\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.metrics import accuracy_score, confusion_matrix\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Read the GeoPackage file\n",
"City_MIcat = gpd.read_file('Nairobi_Reference_MIcat.gpkg')\n",
Expand Down Expand Up @@ -143,7 +145,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "9c7b3703-07cd-4b58-b234-9f9ea14479fb",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -173,6 +175,47 @@
}
],
"source": [
"# Read the GeoPackage file\n",
"City_MIcat = gpd.read_file('Nairobi_Reference_MIcat.gpkg')\n",
"\n",
"# Convert from Mollweide (ESRI:54009) to WGS84 (EPSG:4326)\n",
"City_MIcat = City_MIcat.to_crs(epsg=4326)\n",
"\n",
"# Convert to a pandas DataFrame without the geometry column\n",
"City_MIcat_df = pd.DataFrame(City_MIcat.drop(columns='geometry'))\n",
"\n",
"# Replace NaN with 0 in population columns\n",
"City_MIcat_df['Pop_stddev'].fillna(0, inplace=True)\n",
"City_MIcat_df['Pop_mean'].fillna(0, inplace=True)\n",
"\n",
"# Map categorical values to integers\n",
"label_map = {'Low': 0, 'Med': 1, 'High': 2}\n",
"City_MIcat_df['MI_cat'] = City_MIcat_df['MI_cat'].map(label_map)\n",
"\n",
"# Separate the data into training and prediction sets\n",
"train_df = City_MIcat_df[City_MIcat_df['MI_cat'].notna()] # Data with MI_cat values (0, 1, 2)\n",
"predict_df = City_MIcat_df[City_MIcat_df['MI_cat'].isna()] # Data with MI_cat as NaN\n",
"\n",
"# Define feature columns\n",
"feature_columns = ['SDS_bin', 'roadlength', 'Pop_mean', 'Pop_stddev', 'ISL_bin', 'Intersections', 'Distance_mean']\n",
"\n",
"# Split the data into training and test sets\n",
"x_train, x_test, y_train, y_test = train_test_split(train_df[feature_columns], train_df['MI_cat'].astype(int), test_size=0.2, random_state=42)\n",
"\n",
"# Fit the Gradient Boosting model\n",
"gbm = GradientBoostingClassifier(n_estimators=1000, learning_rate=0.01, max_depth=3, random_state=42)\n",
"gbm.fit(x_train, y_train)\n",
"\n",
"# Perform 10-Fold Cross validation\n",
"cv_scores_gbm = cross_val_score(gbm, x_train, y_train, cv=10, n_jobs=-1)\n",
"print(\"10-Fold Cross-Validation Scores for GBM: \", cv_scores_gbm)\n",
"print(\"Mean CV Score: \", np.mean(cv_scores_gbm))\n",
"\n",
"# Evaluate on the test set\n",
"y_pred = gbm.predict(x_test)\n",
"test_accuracy = accuracy_score(y_test, y_pred)\n",
"print(\"Accuracy on the Test Set: \", test_accuracy)\n",
"\n",
"# Confusion matrix\n",
"conf_matrix = confusion_matrix(y_test, y_pred, labels=[0, 1, 2])\n",
"\n",
Expand All @@ -190,7 +233,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "163b133c-e696-4338-a007-a0bfc865d723",
"metadata": {},
"outputs": [
Expand Down

0 comments on commit 3c3a06c

Please sign in to comment.