-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
remove arcgis python library dependency; just use requests
- Loading branch information
1 parent
79c5039
commit b6d82a0
Showing
3 changed files
with
78 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
FROM python:3.9-bullseye | ||
|
||
RUN pip install --upgrade pip && pip install arcgis==2.0.* | ||
RUN python3 -m pip install --upgrade pip && pip install requests==2.* pandas==2.* --no-cache-dir | ||
|
||
COPY getdata.py ./ | ||
RUN chmod a+x /getdata.py | ||
|
||
ENTRYPOINT ["python", "./getdata.py"] | ||
ENTRYPOINT ["python3", "./getdata.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,103 @@ | ||
import argparse | ||
import arcgis | ||
import requests | ||
import pandas as pd | ||
import os | ||
import logging | ||
|
||
# Argument parser | ||
def parse_args(): | ||
parser = argparse.ArgumentParser( | ||
description='Extract a feature set from an ArcGIS Online item') | ||
description='Extract a feature set from an ArcGIS Online URL') | ||
|
||
parser.add_argument( | ||
'item_id', | ||
help = "ArcGIS Online item id", | ||
'--url', | ||
help = "ArcGIS Online url for item", | ||
required = True, | ||
type = str) | ||
|
||
parser.add_argument( | ||
'output', | ||
'--output', | ||
help = "Filename to write output to", | ||
required = True, | ||
type = str) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
# Main function to extract and output data from ArcGIS Online | ||
def main(item_id, output): | ||
if os.getenv('ARCGIS_USER') is not None: | ||
# Main function to extract and output data from PHO WTISEN | ||
def main(features_url, output): | ||
|
||
# Load credentials and remove environment variables | ||
username = os.getenv('ARCGIS_USER') | ||
if username is not None: | ||
logging.info("ARCGIS_USER environment variable found") | ||
os.environ.pop('ARCGIS_USER', None) | ||
else: | ||
raise ValueError("ARCGIS_USER environment variable not found.") | ||
|
||
if os.getenv('ARCGIS_PASSWORD') is not None: | ||
|
||
password = os.getenv('ARCGIS_PASSWORD') | ||
if password is not None: | ||
logging.info("ARCGIS_PASSWORD environment variable found") | ||
os.environ.pop('ARCGIS_PASSWORD', None) | ||
else: | ||
raise ValueError("ARCGIS_PASSWORD environment variable not found.") | ||
|
||
logging.info("Connecting to ArcGIS Online") | ||
gis = arcgis.gis.GIS( | ||
username = os.getenv('ARCGIS_USER'), | ||
password = os.getenv('ARCGIS_PASSWORD'), | ||
verify_cert = False) | ||
|
||
logging.info("Logged in to ArcGIS Online as " + str(gis.properties.user.username)) | ||
|
||
logging.info(f"Retrieving {item_id}") | ||
item = gis.content.get(item_id) | ||
|
||
logging.info("Extracting feature set") | ||
feature_set = item.layers[0].query() | ||
|
||
logging.info(f"Outputting feature set to {output}") | ||
feature_set.sdf.to_csv(output) | ||
logging.info("Generating ArcGIS API token") | ||
token = requests.post( | ||
url = 'https://www.arcgis.com/sharing/rest/generateToken', | ||
data = { | ||
'f': 'json', | ||
'username': username, | ||
'password': password, | ||
'referer': 'https://www.arcgis.com', | ||
'expiration': 60, # minutes | ||
}).json()['token'] | ||
|
||
# Set up pagination | ||
batch_size = 1000 | ||
offset = 0 | ||
all_records = [] | ||
continue_pagination = True | ||
|
||
logging.info(f"Retrieving data in batch sizes of {batch_size} from {features_url} in JSON format") | ||
|
||
while continue_pagination: | ||
logging.info(f"Retrieving data batch {(offset//batch_size) + 1}") | ||
|
||
# Fetch batch of records | ||
response = requests.get( | ||
url = features_url, | ||
params= { | ||
'f': 'json', | ||
'where': '1=1', | ||
'outFields': '*', | ||
'resultOffset': offset, | ||
'resultRecordCount': batch_size, | ||
'token': token | ||
}).json() | ||
|
||
# Add records to all_records list | ||
all_records.extend(response.get('features', [])) | ||
|
||
# Check if exceededTransferLimit is true to determine if pagination continues | ||
continue_pagination = response.get('exceededTransferLimit', False) | ||
|
||
# Increment offset | ||
offset += batch_size | ||
|
||
logging.info("All data retrieved") | ||
logging.info("Converting JSON to tabular format") | ||
features = pd.DataFrame([record['attributes'] for record in all_records]) | ||
|
||
rows, columns = features.shape | ||
logging.info(f"Data contains {rows} rows and {columns} columns") | ||
|
||
logging.info(f"Exporting data as {output}") | ||
features.to_csv(output, index = False) | ||
|
||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) | ||
|
||
# Parse and unpack keyword arguments | ||
main(**vars(parse_args())) | ||
|
||
logging.info("Done") |