-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_gee_lulc.py
122 lines (102 loc) · 6.81 KB
/
extract_gee_lulc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def extract_gee_lulc(point_filename,
product='ESA/WorldCover/v100',
id_col='Site',
start_date='2020-01-01',
end_date='2020-01-02',
bands=['Map'],
scale=10, pad=0,
dest_folder=None):
'''
This function extracts land use land cover data from Google Earth Engine for a given set of points.
Uses the function 'gee_point_extract' from the script 'gee_extract_multi_point_data.py' available at github.com/akhi9661/gee_extract_multi_point_data.
Parameters:
point_filename (str): The path to the pandas dataframe containing the points. Must contain columns named 'lat' and 'lon'.
product (str): The name of the land use land cover product to be extracted. The default is 'ESA/WorldCover/v100'. Other option is 'MODIS/061/MCD12Q1'.
id_col (str): The name of the column containing the site IDs. The default is 'Site'.
start_date (str): The start date of the time period for which the data is to be extracted. The default is '2020-01-01'. Format is 'YYYY-MM-DD'.
end_date (str): The end date of the time period for which the data is to be extracted. The default is '2020-01-02'. Format is 'YYYY-MM-DD'.
bands (list): The list of bands to be extracted. The default is ['Map']. For 'MODIS/061/MCD12Q1', the options are ['LC_Type1'] and ['LC_Type2'].
scale (int): The scale at which the data is to be extracted. The default is 10. Generally, matches the native resolution of the product.
pad (float): The padding to be used for the extraction. The default is 0.9. Units are in km.
dest_folder (str): The path to the folder where the output csv file is to be saved. The default is None. If None, the file is saved in the current working directory.
Returns:
A pandas dataframe containing the extracted data.
'''
import requests, os, pandas as pd
from io import StringIO
if dest_folder is None:
opf = os.path.join(os.getcwd(), f'{product.split("/")[-1]}_{start_date}_{end_date}.csv')
else:
opf = os.path.join(dest_folder, f'{product.split("/")[-1]}_{start_date}_{end_date}.csv')
github_link = "https://raw.githubusercontent.com/akhi9661/gee_extract_multi_point_data/main/gee_point_extract.py"
response = requests.get(github_link)
gee_extract = response.text
script_file = StringIO(gee_extract)
exec(script_file.read())
function_name = "gee_point_extract"
if product == 'ESA/WorldCover/v100':
mapping_df = pd.DataFrame({'Value': [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100],
'Description': ['Tree cover', 'Shrubland', 'Grassland', 'Cropland',
'Built-up', 'Bare / sparse vegetation', 'Snow and ice',
'Permanent water bodies', 'Herbaceous wetland', 'Mangroves',
'Moss and lichen']})
start_date = '2020-01-01'
end_date = '2020-01-02'
scale = 10
pad = 0.9
if product == 'MODIS/061/MCD12Q1':
if bands[0] == 'LC_Type1':
mapping_df = pd.DataFrame({'Value': list(range(1, 18)),
'Description': ['Evergreen Needleleaf Forests', 'Evergreen Broadleaf Forests',
'Deciduous Needleleaf Forests', 'Deciduous Broadleaf Forests',
'Mixed Forests', 'Closed Shrublands', 'Open Shrublands',
'Woody Savannas', 'Savannas', 'Grasslands', 'Permanent Wetlands',
'Croplands', 'Urban and Built-up Lands', 'Cropland/Natural Vegetation Mosaics',
'Permanent Snow and Ice', 'Barren', 'Water Bodies']})
start_date = '2020-01-01'
end_date = '2020-01-02'
scale = 500
pad = 0
if bands[0] == 'LC_Type2':
mapping_df = pd.DataFrame({'Value': list(range(0, 16)),
'Description': ['Water Bodies', 'Evergreen Needleleaf Forests', 'Evergreen Broadleaf Forests',
'Deciduous Needleleaf Forests', 'Deciduous Broadleaf Forests',
'Mixed Forests', 'Closed Shrublands', 'Open Shrublands',
'Woody Savannas', 'Savannas', 'Grasslands', 'Permanent Wetlands',
'Croplands', 'Urban and Built-up Lands', 'Cropland/Natural Vegetation Mosaics',
'Non-Vegetated Lands']})
start_date = '2020-01-01'
end_date = '2020-01-02'
scale = 500
pad = 0
if function_name in locals():
df = locals()[function_name](point_filename, product=product,
start_date=start_date,
end_date=end_date,
id_col='Site', pad=pad,
bands=bands,
scale=scale, dest_folder=None)
else:
print(f"The function '{function_name}' is not defined in the script.")
df[id_col] = df[id_col].astype('category')
df[bands[0]] = df[bands[0]].astype('category')
map_counts = df[bands[0]].value_counts()
site_map_counts = df.groupby(id_col)[bands[0]].count()
new_df = pd.DataFrame(columns=[id_col, 'lat', 'lon', bands[0], 'Class'])
for site in df[id_col].unique():
site_data = df[df[id_col] == site]
site_data = df[df[id_col] == site]
site_map_counts = site_data[bands[0]].value_counts()
majority_value = site_map_counts.idxmax()
site_class_data = pd.DataFrame({id_col: [site] * len(site_map_counts),
'lat': site_data['latitude'].values[0], # Append latitude
'lon': site_data['longitude'].values[0], # Append longitude
bands[0]: site_map_counts.index,
'Class': [majority_value] * len(site_map_counts)})
new_df = new_df.append(site_class_data, ignore_index=True)
new_df = new_df.drop_duplicates(subset=['Site'])
merged_df = new_df.merge(mapping_df, left_on='Class', right_on='Value', how='left')
merged_df['Class Name'] = merged_df['Description'].where(pd.notnull(merged_df['Description']), '')
merged_df.drop(['Value', 'Description'], axis=1, inplace=True)
merged_df.to_csv(opf, index=False)
return merged_df