-
Notifications
You must be signed in to change notification settings - Fork 326
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* populate index attempt * added tests * correct plot method * fix test * fix documentation * fix docs * name changes * lazy import pandas and Any instead of Tensor * requested changes * mypy fixes * Close plot filehandles Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
- Loading branch information
1 parent
4c221df
commit 06ec364
Showing
7 changed files
with
739 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import csv | ||
import gzip | ||
import hashlib | ||
import json | ||
import os | ||
import random | ||
import shutil | ||
|
||
import numpy as np | ||
from shapely.geometry import Polygon | ||
|
||
SIZE = 0.05 | ||
|
||
np.random.seed(0) | ||
random.seed(0) | ||
|
||
|
||
def create_meta_data_file(zipfilename): | ||
meta_data = { | ||
"type": "FeatureCollection", | ||
"features": [ | ||
{ | ||
"type": "Feature", | ||
"geometry": { | ||
"type": "Polygon", | ||
"coordinates": [ | ||
[[0.0, 0.0], [0.0, SIZE], [SIZE, SIZE], [SIZE, 0.0], [0.0, 0.0]] | ||
], | ||
}, | ||
"properties": { | ||
"tile_id": "025", | ||
"tile_url": "polygons_s2_level_4_gzip/{}".format(zipfilename), | ||
"size_mb": 0.2, | ||
}, | ||
} | ||
], | ||
} | ||
return meta_data | ||
|
||
|
||
def create_csv_data_row(lat, long): | ||
width, height = SIZE / 10, SIZE / 10 | ||
minx = long - 0.5 * width | ||
maxx = long + 0.5 * width | ||
miny = lat - 0.5 * height | ||
maxy = lat - 0.5 * height | ||
coordinates = [(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny), (minx, miny)] | ||
polygon = Polygon(coordinates) | ||
|
||
data_row = { | ||
"latitude": lat, | ||
"longitude": long, | ||
"area_in_meters": 1.0, | ||
"confidence": 1.0, | ||
"geometry": polygon.wkt, | ||
"full_plus_code": "ABC", | ||
} | ||
|
||
return data_row | ||
|
||
|
||
def create_buildings_data(): | ||
fourth = SIZE / 4 | ||
# pandas df | ||
dict_data = [ | ||
create_csv_data_row(fourth, fourth), | ||
create_csv_data_row(SIZE - fourth, SIZE - fourth), | ||
] | ||
return dict_data | ||
|
||
|
||
if __name__ == "__main__": | ||
csvname = "000_buildings.csv" | ||
zipfilename = csvname + ".gz" | ||
|
||
# create and save metadata | ||
meta_data = create_meta_data_file(zipfilename) | ||
with open("tiles.geojson", "w") as fp: | ||
json.dump(meta_data, fp) | ||
|
||
# create and archive buildings data | ||
buildings_data = create_buildings_data() | ||
keys = buildings_data[0].keys() | ||
with open(csvname, "w") as f: | ||
w = csv.DictWriter(f, keys) | ||
w.writeheader() | ||
w.writerows(buildings_data) | ||
|
||
# archive the csv to gzip | ||
with open(csvname, "rb") as f_in: | ||
with gzip.open(zipfilename, "wb") as f_out: | ||
shutil.copyfileobj(f_in, f_out) | ||
|
||
# Compute checksums | ||
with open(zipfilename, "rb") as f: | ||
md5 = hashlib.md5(f.read()).hexdigest() | ||
print(f"{zipfilename}: {md5}") | ||
|
||
# remove csv file | ||
os.remove(csvname) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[[0.0, 0.0], [0.0, 0.05], [0.05, 0.05], [0.05, 0.0], [0.0, 0.0]]]}, "properties": {"tile_id": "025", "tile_url": "polygons_s2_level_4_gzip/000_buildings.csv.gz", "size_mb": 0.2}}]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import builtins | ||
import json | ||
import os | ||
import shutil | ||
from pathlib import Path | ||
from typing import Any, Generator | ||
|
||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
import pytest | ||
import torch | ||
import torch.nn as nn | ||
from _pytest.fixtures import SubRequest | ||
from _pytest.monkeypatch import MonkeyPatch | ||
from rasterio.crs import CRS | ||
|
||
from torchgeo.datasets import ( | ||
BoundingBox, | ||
IntersectionDataset, | ||
OpenBuildings, | ||
UnionDataset, | ||
) | ||
|
||
pytest.importorskip("pandas", minversion="0.19.1") | ||
|
||
|
||
class TestOpenBuildings: | ||
@pytest.fixture | ||
def dataset( | ||
self, monkeypatch: Generator[MonkeyPatch, None, None], tmp_path: Path | ||
) -> OpenBuildings: | ||
|
||
root = str(tmp_path) | ||
shutil.copy( | ||
os.path.join("tests", "data", "openbuildings", "tiles.geojson"), root | ||
) | ||
shutil.copy( | ||
os.path.join("tests", "data", "openbuildings", "000_buildings.csv.gz"), root | ||
) | ||
|
||
md5s = {"000_buildings.csv.gz": "20aeeec9d45a0ce4d772a26e0bcbc25f"} | ||
|
||
monkeypatch.setattr(OpenBuildings, "md5s", md5s) # type: ignore[attr-defined] | ||
transforms = nn.Identity() # type: ignore[attr-defined] | ||
return OpenBuildings(root=root, transforms=transforms) | ||
|
||
@pytest.fixture(params=["pandas"]) | ||
def mock_missing_module( | ||
self, monkeypatch: Generator[MonkeyPatch, None, None], request: SubRequest | ||
) -> str: | ||
import_orig = builtins.__import__ | ||
package = str(request.param) | ||
|
||
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any: | ||
if name == package: | ||
raise ImportError() | ||
return import_orig(name, *args, **kwargs) | ||
|
||
monkeypatch.setattr( # type: ignore[attr-defined] | ||
builtins, "__import__", mocked_import | ||
) | ||
return package | ||
|
||
def test_mock_missing_module( | ||
self, dataset: OpenBuildings, mock_missing_module: str | ||
) -> None: | ||
package = mock_missing_module | ||
|
||
with pytest.raises( | ||
ImportError, | ||
match=f"{package} is not installed and is required to use this dataset", | ||
): | ||
OpenBuildings(root=dataset.root) | ||
|
||
def test_no_shapes_to_rasterize( | ||
self, dataset: OpenBuildings, tmp_path: Path | ||
) -> None: | ||
# empty csv buildings file | ||
path = os.path.join(tmp_path, "000_buildings.csv.gz") | ||
df = pd.read_csv(path) | ||
df = pd.DataFrame(columns=df.columns) | ||
df.to_csv(path, compression="gzip") | ||
x = dataset[dataset.bounds] | ||
assert isinstance(x, dict) | ||
assert isinstance(x["crs"], CRS) | ||
assert isinstance(x["mask"], torch.Tensor) | ||
|
||
def test_no_building_data_found(self, tmp_path: Path) -> None: | ||
false_root = os.path.join(tmp_path, "empty") | ||
os.makedirs(false_root) | ||
shutil.copy( | ||
os.path.join("tests", "data", "openbuildings", "tiles.geojson"), false_root | ||
) | ||
with pytest.raises( | ||
RuntimeError, match="have manually downloaded the dataset as suggested " | ||
): | ||
OpenBuildings(root=false_root) | ||
|
||
def test_corrupted(self, dataset: OpenBuildings, tmp_path: Path) -> None: | ||
with open(os.path.join(tmp_path, "000_buildings.csv.gz"), "w") as f: | ||
f.write("bad") | ||
with pytest.raises(RuntimeError, match="Dataset found, but corrupted."): | ||
OpenBuildings(dataset.root, checksum=True) | ||
|
||
def test_no_meta_data_found(self, tmp_path: Path) -> None: | ||
false_root = os.path.join(tmp_path, "empty") | ||
os.makedirs(false_root) | ||
with pytest.raises(FileNotFoundError, match="Meta data file"): | ||
OpenBuildings(root=false_root) | ||
|
||
def test_nothing_in_index(self, dataset: OpenBuildings, tmp_path: Path) -> None: | ||
# change meta data to another 'title_url' so that there is no match found | ||
with open(os.path.join(tmp_path, "tiles.geojson"), "r") as f: | ||
content = json.load(f) | ||
content["features"][0]["properties"]["tile_url"] = "mismatch.csv.gz" | ||
|
||
with open(os.path.join(tmp_path, "tiles.geojson"), "w") as f: | ||
json.dump(content, f) | ||
|
||
with pytest.raises(FileNotFoundError, match="data was found in"): | ||
OpenBuildings(dataset.root) | ||
|
||
def test_getitem(self, dataset: OpenBuildings) -> None: | ||
x = dataset[dataset.bounds] | ||
assert isinstance(x, dict) | ||
assert isinstance(x["crs"], CRS) | ||
assert isinstance(x["mask"], torch.Tensor) | ||
|
||
def test_and(self, dataset: OpenBuildings) -> None: | ||
ds = dataset & dataset | ||
assert isinstance(ds, IntersectionDataset) | ||
|
||
def test_or(self, dataset: OpenBuildings) -> None: | ||
ds = dataset | dataset | ||
assert isinstance(ds, UnionDataset) | ||
|
||
def test_invalid_query(self, dataset: OpenBuildings) -> None: | ||
query = BoundingBox(100, 100, 100, 100, 0, 0) | ||
with pytest.raises( | ||
IndexError, match="query: .* not found in index with bounds:" | ||
): | ||
dataset[query] | ||
|
||
def test_plot(self, dataset: OpenBuildings) -> None: | ||
x = dataset[dataset.bounds] | ||
dataset.plot(x, suptitle="test") | ||
plt.close() | ||
|
||
def test_plot_prediction(self, dataset: OpenBuildings) -> None: | ||
x = dataset[dataset.bounds] | ||
x["prediction"] = x["mask"].clone() | ||
dataset.plot(x, suptitle="Prediction") | ||
plt.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.