-
Notifications
You must be signed in to change notification settings - Fork 345
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add EDDMapS dataset * Mypy hack * Test fix
- Loading branch information
1 parent
369b361
commit 827985a
Showing
6 changed files
with
291 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import pandas as pd | ||
|
||
filename = "mappings.csv" | ||
|
||
size = 3 | ||
data = { | ||
"gbifID": [""] * size, | ||
"decimalLatitude": [41.881832] * size, | ||
"decimalLongitude": [""] + [-87.623177] * (size - 1), | ||
"objectid": [""] * size, | ||
"reporter": [""] * size, | ||
"RecOwner": [""] * size, | ||
"SciName": ["Homo sapiens"] * size, | ||
"ComName": ["human"] * size, | ||
"Nativity": ["Native"] * size, | ||
"OccStatus": ["Detected"] * size, | ||
"Status": ["Positive"] * size, | ||
"ObsDate": ["", "", "05-07-22"], | ||
"DateEnt": ["05-07-22"] * size, | ||
"DateUp": ["05-07-22"] * size, | ||
"Location": ["Chicago, Illinois, United States"] * size, | ||
"Latitude": [41.881832] * size, | ||
"Longitude": [""] + [-87.623177] * (size - 1), | ||
"Datum": ["WGS84"] * size, | ||
"Method": [""] * size, | ||
"CoordAcc": [""] * size, | ||
"DataType": [""] * size, | ||
"Centroid": [""] * size, | ||
"Abundance": [""] * size, | ||
"InfestAcre": [""] * size, | ||
"GrossAcre": [""] * size, | ||
"Percentcov": [""] * size, | ||
"Density": [""] * size, | ||
"Quantity": [""] * size, | ||
"QuantityU": [""] * size, | ||
"APPXQuant": [""] * size, | ||
"NumCollect": [""] * size, | ||
"Smallest": [""] * size, | ||
"Largest": [""] * size, | ||
"Incidence": [""] * size, | ||
"Severity": [""] * size, | ||
"Host": [""] * size, | ||
"Host_Name": [""] * size, | ||
"HostPheno": [""] * size, | ||
"HostDamage": [""] * size, | ||
"ManageStat": ["Unknown"] * size, | ||
"PopStat": [""] * size, | ||
"Habitat": [""] * size, | ||
"LocalOwner": [""] * size, | ||
"Site": [""] * size, | ||
"RecBasis": [""] * size, | ||
"Museum": [""] * size, | ||
"MuseumRec": [""] * size, | ||
"Voucher": [""] * size, | ||
"ObsIDer": [""] * size, | ||
"CollectTme": [""] * size, | ||
"UUID": [""] * size, | ||
"OrgSrcID": [""] * size, | ||
"OrigName": ["Homo sapiens"] * size, | ||
"RecSrcTyp": ["Bulk Data"] * size, | ||
"Surveyor": [""] * size, | ||
"DateAcc": [""] * size, | ||
"VisitType": [""] * size, | ||
"DataMthd": [""] * size, | ||
"TrapType": [""] * size, | ||
"NumTraps": [""] * size, | ||
"TargetName": [""] * size, | ||
"TargetCnt": [""] * size, | ||
"TargetRnge": [""] * size, | ||
"Phenology": [""] * size, | ||
"LifeStatus": [""] * size, | ||
"Sex": [""] * size, | ||
"PID": [""] * size, | ||
"WaterName": [""] * size, | ||
"WaterType": [""] * size, | ||
"Substrate": [""] * size, | ||
"TreatArea": [""] * size, | ||
"PlantTreat": [""] * size, | ||
"TreatComm": [""] * size, | ||
"Reference": [""] * size, | ||
"Locality": [""] * size, | ||
"Comments": [""] * size, | ||
"ReviewDate": ["05-07-22"] * size, | ||
"Reviewer": ["Charles Darwin"] * size, | ||
"VerifyMthd": ["Bulk Verified"] * size, | ||
"Verified": ["Verified"] * size, | ||
"IDCred": ["Credible"] * size, | ||
"ReviewComm": [""] * size, | ||
} | ||
|
||
df = pd.DataFrame(data) | ||
df.to_csv(filename, index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
gbifID,decimalLatitude,decimalLongitude,objectid,reporter,RecOwner,SciName,ComName,Nativity,OccStatus,Status,ObsDate,DateEnt,DateUp,Location,Latitude,Longitude,Datum,Method,CoordAcc,DataType,Centroid,Abundance,InfestAcre,GrossAcre,Percentcov,Density,Quantity,QuantityU,APPXQuant,NumCollect,Smallest,Largest,Incidence,Severity,Host,Host_Name,HostPheno,HostDamage,ManageStat,PopStat,Habitat,LocalOwner,Site,RecBasis,Museum,MuseumRec,Voucher,ObsIDer,CollectTme,UUID,OrgSrcID,OrigName,RecSrcTyp,Surveyor,DateAcc,VisitType,DataMthd,TrapType,NumTraps,TargetName,TargetCnt,TargetRnge,Phenology,LifeStatus,Sex,PID,WaterName,WaterType,Substrate,TreatArea,PlantTreat,TreatComm,Reference,Locality,Comments,ReviewDate,Reviewer,VerifyMthd,Verified,IDCred,ReviewComm | ||
,41.881832,,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible, | ||
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible, | ||
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,05-07-22,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import builtins | ||
import os | ||
from pathlib import Path | ||
from typing import Any | ||
|
||
import pytest | ||
from _pytest.monkeypatch import MonkeyPatch | ||
|
||
from torchgeo.datasets import BoundingBox, EDDMapS, IntersectionDataset, UnionDataset | ||
|
||
pytest.importorskip("pandas", minversion="0.23.2") | ||
|
||
|
||
class TestEDDMapS: | ||
@pytest.fixture(scope="class") | ||
def dataset(self) -> EDDMapS: | ||
root = os.path.join("tests", "data", "eddmaps") | ||
return EDDMapS(root) | ||
|
||
def test_getitem(self, dataset: EDDMapS) -> None: | ||
x = dataset[dataset.bounds] | ||
assert isinstance(x, dict) | ||
|
||
def test_len(self, dataset: EDDMapS) -> None: | ||
assert len(dataset) == 2 | ||
|
||
def test_and(self, dataset: EDDMapS) -> None: | ||
ds = dataset & dataset | ||
assert isinstance(ds, IntersectionDataset) | ||
|
||
def test_or(self, dataset: EDDMapS) -> None: | ||
ds = dataset | dataset | ||
assert isinstance(ds, UnionDataset) | ||
|
||
def test_no_data(self, tmp_path: Path) -> None: | ||
with pytest.raises(FileNotFoundError, match="Dataset not found"): | ||
EDDMapS(str(tmp_path)) | ||
|
||
@pytest.fixture | ||
def mock_missing_module(self, monkeypatch: MonkeyPatch) -> None: | ||
import_orig = builtins.__import__ | ||
|
||
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any: | ||
if name == "pandas": | ||
raise ImportError() | ||
return import_orig(name, *args, **kwargs) | ||
|
||
monkeypatch.setattr(builtins, "__import__", mocked_import) | ||
|
||
def test_mock_missing_module( | ||
self, dataset: EDDMapS, mock_missing_module: None | ||
) -> None: | ||
with pytest.raises( | ||
ImportError, | ||
match="pandas is not installed and is required to use this dataset", | ||
): | ||
EDDMapS(dataset.root) | ||
|
||
def test_invalid_query(self, dataset: EDDMapS) -> None: | ||
query = BoundingBox(0, 0, 0, 0, 0, 0) | ||
with pytest.raises( | ||
IndexError, match="query: .* not found in index with bounds:" | ||
): | ||
dataset[query] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
"""Dataset for EDDMapS.""" | ||
|
||
import os | ||
import sys | ||
from typing import Any, Dict | ||
|
||
import numpy as np | ||
from rasterio.crs import CRS | ||
|
||
from .geo import GeoDataset | ||
from .utils import BoundingBox, disambiguate_timestamp | ||
|
||
|
||
class EDDMapS(GeoDataset): | ||
"""Dataset for EDDMapS. | ||
`EDDMapS <https://www.eddmaps.org/>`_, Early Detection and Distribution Mapping | ||
System, is a web-based mapping system for documenting invasive species and pest | ||
distribution. Launched in 2005 by the Center for Invasive Species and Ecosystem | ||
Health at the University of Georgia, it was originally designed as a tool for | ||
state Exotic Pest Plant Councils to develop more complete distribution data of | ||
invasive species. Since then, the program has expanded to include the entire US | ||
and Canada as well as to document certain native pest species. | ||
EDDMapS query results can be downloaded in CSV, KML, or Shapefile format. This | ||
dataset currently only supports CSV files. | ||
If you use an EDDMapS dataset in your research, please cite it like so: | ||
* EDDMapS. *YEAR*. Early Detection & Distribution Mapping System. The University of | ||
Georgia - Center for Invasive Species and Ecosystem Health. Available online at | ||
http://www.eddmaps.org/; last accessed *DATE*. | ||
.. note:: | ||
This dataset requires the following additional library to be installed: | ||
* `pandas <https://pypi.org/project/pandas/>`_ to load CSV files | ||
.. versionadded:: 0.3 | ||
""" | ||
|
||
res = 0 | ||
_crs = CRS.from_epsg(4326) # Lat/Lon | ||
|
||
def __init__(self, root: str = "data") -> None: | ||
"""Initialize a new Dataset instance. | ||
Args: | ||
root: root directory where dataset can be found | ||
Raises: | ||
FileNotFoundError: if no files are found in ``root`` | ||
ImportError: if pandas is not installed | ||
""" | ||
super().__init__() | ||
|
||
self.root = root | ||
|
||
filepath = os.path.join(root, "mappings.csv") | ||
if not os.path.exists(filepath): | ||
raise FileNotFoundError(f"Dataset not found in `root={self.root}`") | ||
|
||
try: | ||
import pandas as pd # noqa: F401 | ||
except ImportError: | ||
raise ImportError( | ||
"pandas is not installed and is required to use this dataset" | ||
) | ||
|
||
# Read CSV file | ||
data = pd.read_csv( | ||
filepath, engine="c", usecols=["ObsDate", "Latitude", "Longitude"] | ||
) | ||
|
||
# Convert from pandas DataFrame to rtree Index | ||
i = 0 | ||
for date, y, x in data.itertuples(index=False, name=None): | ||
# Skip rows without lat/lon | ||
if np.isnan(y) or np.isnan(x): | ||
continue | ||
|
||
if not pd.isna(date): | ||
mint, maxt = disambiguate_timestamp(date, "%m-%d-%y") | ||
else: | ||
mint, maxt = 0, sys.maxsize | ||
|
||
coords = (x, x, y, y, mint, maxt) | ||
self.index.insert(i, coords) | ||
i += 1 | ||
|
||
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]: | ||
"""Retrieve metadata indexed by query. | ||
Args: | ||
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index | ||
Returns: | ||
sample of metadata at that index | ||
Raises: | ||
IndexError: if query is not found in the index | ||
""" | ||
hits = self.index.intersection(tuple(query), objects=True) | ||
bboxes = [hit.bbox for hit in hits] | ||
|
||
if not bboxes: | ||
raise IndexError( | ||
f"query: {query} not found in index with bounds: {self.bounds}" | ||
) | ||
|
||
sample = {"crs": self.crs, "bbox": bboxes} | ||
|
||
return sample |