Skip to content

Commit

Permalink
use github releases as a way to point to test data
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Aug 27, 2024
1 parent 5bea98b commit 492ccb9
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 9 deletions.
40 changes: 40 additions & 0 deletions data_prep/bamberg_mini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pandas as pd
import os
import shutil
import zipfile

df = pd.read_csv("/orange/ewhite/DeepForest/Troles_Bamberg/coco2048/annotations/annotations.csv")

# Extract tile names from the image filenames
df['tile_name'] = df['image_path'].apply(lambda x: '_'.join(os.path.basename(x).split('_')[:-1]))

# Create a new dataframe with one example of each tile
df_unique_tiles = df.groupby('tile_name').first().reset_index()

# Save the new dataframe to a CSV file
df_mini = df[df.image_path.isin(df_unique_tiles.image_path)]

# Copy the images of the unique tiles to a new directory
source_dir = "/orange/ewhite/DeepForest/Troles_Bamberg/coco2048/images"
target_dir = "/orange/ewhite/DeepForest/Troles_Bamberg/Bamberg_mini/images"

os.makedirs(target_dir, exist_ok=True)
df_mini.to_csv("/orange/ewhite/DeepForest/Troles_Bamberg/Bamberg_mini/annotations.csv", index=False)

for filename, row in df_unique_tiles.groupby("image_path").first().iterrows():
source_path = os.path.join(source_dir, filename)
target_path = os.path.join(target_dir, filename)
try:
shutil.copyfile(source_path, target_path)
except shutil.SameFileError:
continue

# Zip the data and images
zip_filename = "/orange/ewhite/DeepForest/Troles_Bamberg/Bamberg_mini/images.zip"

with zipfile.ZipFile(zip_filename, 'w') as zipf:
zipf.write("/orange/ewhite/DeepForest/Troles_Bamberg/Bamberg_mini/annotations.csv", arcname="annotations.csv")
for _, row in df_unique_tiles.iterrows():
filename = row['image_path']
image_path = os.path.join(target_dir, filename)
zipf.write(image_path, arcname=filename)
10 changes: 4 additions & 6 deletions data_prep/collect_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,15 @@
if not os.path.exists(destination + os.path.basename(image)):
shutil.copy(image, destination)

"""
for image in TreePolygons_datasets.filename.unique():
destination = "/orange/ewhite/DeepForest/MillionTrees/TreePolygons_v0.0/images/"
if not os.path.exists(destination + os.path.basename(image)):
shutil.copy(image, destination)
"""

# change filenames to correct absolute path
TreeBoxes_datasets.filename = "/orange/ewhite/DeepForest/MillionTrees/TreeBoxes_v0.0/images/" + TreeBoxes_datasets.filename.str.split("/").str[-1]
TreePoints_datasets.filename = "/orange/ewhite/DeepForest/MillionTrees/TreePoints_v0.0/images/" + TreePoints_datasets.filename.str.split("/").str[-1]
TreePolygons_datasets.filename = "/orange/ewhite/DeepForest/MillionTrees/TreePolygons_v0.0/images/" + TreePolygons_datasets.filename.str.split("/").str[-1]
# change filenames to relative path
TreeBoxes_datasets["filename"] = TreeBoxes_datasets["filename"].apply(os.path.basename)
TreePoints_datasets["filename"] = TreePoints_datasets["filename"].apply(os.path.basename)
TreePolygons_datasets["filename"] = TreePolygons_datasets["filename"].apply(os.path.basename)

# Save splits
TreePolygons_datasets.to_csv("/orange/ewhite/DeepForest/MillionTrees/TreePolygons_v0.0/official.csv", index=False)
Expand Down
2 changes: 1 addition & 1 deletion milliontrees/datasets/TreeBoxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class TreeBoxesDataset(MillionTreesDataset):
_versions_dict = {
'0.0': {
'download_url':
'https://www.dropbox.com/scl/fi/1qv5ymx54r89qw54nj49k/TreeBoxes_v0.0.zip?rlkey=64dxo9y7wmdq9vehs7odakavy&dl=0',
'https://github.com/weecology/MillionTrees/releases/download/0.0.0-alpha/TreeBoxes_v0.0.zip',
'compressed_size':
5940337
}
Expand Down
2 changes: 1 addition & 1 deletion milliontrees/datasets/TreePoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TreePointsDataset(MillionTreesDataset):
_versions_dict = {
'0.0': {
'download_url':
'https://www.dropbox.com/scl/fi/csqdtsps3thltrmbc2amx/TreePoints_v0.0.zip?rlkey=s8ycx5ssh14u2a5amiz0dx3ks&dl=0',
'https://github.com/weecology/MillionTrees/releases/download/0.0.0-alpha/TreePoints_v0.0.zip',
'compressed_size':
523312564
}
Expand Down
2 changes: 1 addition & 1 deletion milliontrees/datasets/TreePolygons.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TreePolygonsDataset(MillionTreesDataset):
_versions_dict = {
'0.0': {
'download_url':
'https://www.dropbox.com/scl/fi/81ost5jvsp7cb8br02mm4/TreePolygons_v0.0.zip?rlkey=cu1u1r6s1qftvedkgl3wo7bji&dl=0',
'https://github.com/weecology/MillionTrees/releases/download/0.0.0-alpha/TreePolygons_v0.0.zip',
'compressed_size':
17112645
}
Expand Down

0 comments on commit 492ccb9

Please sign in to comment.