Skip to content

Commit

Permalink
Raster names are stored in RasterFileDictionary, no more duplicated s…
Browse files Browse the repository at this point in the history
…trings
  • Loading branch information
elidwa committed Oct 25, 2024
1 parent e7ef869 commit 53c940b
Show file tree
Hide file tree
Showing 22 changed files with 588 additions and 312 deletions.
5 changes: 3 additions & 2 deletions clients/python/tests/test_arcticdem.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@ def test_indexed_raster(self, init):
"samples": {"strips": {"asset": "arcticdem-strips", "with_flags": True}} }
gdf = icesat2.atl06p(parms, resources=['ATL03_20191108234307_06580503_005_01.h5'])
assert init
assert len(gdf.attrs['file_directory']) == 32
for file_id in range(16):
assert len(gdf.attrs['file_directory']) == 16
for file_id in range(0, 16, 2):
assert file_id in gdf.attrs['file_directory'].keys()
assert '/pgc-opendata-dems/arcticdem/strips/' in gdf.attrs['file_directory'][file_id]
assert '_dem.tif' in gdf.attrs['file_directory'][file_id] # only dems, no flags

11 changes: 5 additions & 6 deletions datasets/gebco/package/GebcoBathyRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder)
if (!rastergeo->Intersects(geo)) continue;

rasters_group_t* rgroup = new rasters_group_t;
rgroup->featureId = feature->GetFieldAsString("id");
rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate);

const char* dataFile = feature->GetFieldAsString("data_raster");
Expand All @@ -95,7 +94,7 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder)
raster_info_t rinfo;
rinfo.dataIsElevation = true;
rinfo.tag = VALUE_TAG;
rinfo.fileName = filePath + "/" + dataFile;
rinfo.fileId = finder->fileDict.add(filePath + "/" + dataFile);
rgroup->infovect.push_back(rinfo);
}

Expand All @@ -106,16 +105,16 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder)
{
raster_info_t rinfo;
rinfo.dataIsElevation = false;
rinfo.tag = FLAGS_TAG;
rinfo.fileName = filePath + "/" + flagsFile;
rinfo.tag = FLAGS_TAG;
rinfo.fileId = finder->fileDict.add(filePath + "/" + flagsFile);
rgroup->infovect.push_back(rinfo);
}
}
rgroup->infovect.shrink_to_fit();

mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size());
mlog(DEBUG, "Added group with %ld rasters", rgroup->infovect.size());
for(unsigned j = 0; j < rgroup->infovect.size(); j++)
mlog(DEBUG, " %s", rgroup->infovect[j].fileName.c_str());
mlog(DEBUG, " %s", finder->fileDict.get(rgroup->infovect[j].fileId));

// Add the group
finder->rasterGroups.push_back(rgroup);
Expand Down
21 changes: 11 additions & 10 deletions datasets/landsat/package/LandsatHlsRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder)

/* Set raster group time and group featureId */
rasters_group_t* rgroup = new rasters_group_t;
rgroup->featureId = feature->GetFieldAsString("id");
rgroup->featureId = StringLib::duplicate(feature->GetFieldAsString("id"));
rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate);

/* Find each requested band in the index file */
Expand All @@ -188,7 +188,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder)

raster_info_t rinfo;
rinfo.dataIsElevation = false; /* All bands are not elevation */
rinfo.fileName = filePath + fileName.substr(pos);
rinfo.fileId = finder->fileDict.add(filePath + fileName.substr(pos));

if(strcmp(bandName, "Fmask") == 0)
{
Expand All @@ -207,7 +207,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder)
}
}

// mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size());
// mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId, rgroup->infovect.size());
finder->rasterGroups.push_back(rgroup);
}
// mlog(DEBUG, "Found %ld raster groups", finder->rasterGroups.size());
Expand Down Expand Up @@ -274,10 +274,11 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
bool isS2 = false;
std::size_t pos;

pos = rgroup->featureId.find("HLS.L30");
const std::string featureId = rgroup->featureId;
pos = featureId.find("HLS.L30");
if(pos != std::string::npos) isL8 = true;

pos = rgroup->featureId.find("HLS.S30");
pos = featureId.find("HLS.S30");
if(pos != std::string::npos) isS2 = true;

if(!isL8 && !isS2)
Expand All @@ -295,7 +296,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
{
for(const auto& rinfo : rgroup->infovect)
{
const char* key = rinfo.fileName.c_str();
const char* key = fileDictGet(rinfo.fileId);
cacheitem_t* item;
if(cache.find(key, &item))
{
Expand Down Expand Up @@ -407,12 +408,12 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
}

const double groupTime = rgroup->gpsTime / 1000;
const std::string groupName = rgroup->featureId + " {\"algo\": \"";
const std::string groupName = featureId + " {\"algo\": \"";

/* Calculate algos - make sure that all the necessary bands were read */
if(ndsi)
{
RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDSI\"}"));
RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDSI\"}"));
if((green != invalid) && (swir16 != invalid))
sample->value = (green - swir16) / (green + swir16);
else sample->value = invalid;
Expand All @@ -421,7 +422,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr

if(ndvi)
{
RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDVI\"}"));
RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDVI\"}"));
if((red != invalid) && (nir08 != invalid))
sample->value = (nir08 - red) / (nir08 + red);
else sample->value = invalid;
Expand All @@ -430,7 +431,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr

if(ndwi)
{
RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDWI\"}"));
RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDWI\"}"));
if((nir08 != invalid) && (swir16 != invalid))
sample->value = (nir08 - swir16) / (nir08 + swir16);
else sample->value = invalid;
Expand Down
13 changes: 6 additions & 7 deletions datasets/pgc/package/PgcDemStripsRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ bool PgcDemStripsRaster::findRasters(raster_finder_t* finder)
raster_info_t demRinfo;
demRinfo.dataIsElevation = true;
demRinfo.tag = VALUE_TAG;
demRinfo.fileName = fileName;
demRinfo.fileId = finder->fileDict.add(fileName);

/* bitmask raster, ie flags_file */
if(parms->flags_file)
Expand All @@ -242,13 +242,12 @@ bool PgcDemStripsRaster::findRasters(raster_finder_t* finder)
}
else fileName.clear();

raster_info_t flagsRinfo;
flagsRinfo.dataIsElevation = false;
flagsRinfo.tag = FLAGS_TAG;
flagsRinfo.fileName = fileName;

if(!flagsRinfo.fileName.empty())
if(!fileName.empty())
{
raster_info_t flagsRinfo;
flagsRinfo.dataIsElevation = false;
flagsRinfo.tag = FLAGS_TAG;
flagsRinfo.fileId = finder->fileDict.add(fileName);
rgroup->infovect.push_back(flagsRinfo);
}
}
Expand Down
5 changes: 2 additions & 3 deletions datasets/usgs3dep/package/Usgs3dep1meterDemRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ bool Usgs3dep1meterDemRaster::findRasters(raster_finder_t* finder)
if (!rastergeo->Intersects(geo)) continue;

rasters_group_t* rgroup = new rasters_group_t;
rgroup->featureId = feature->GetFieldAsString("id");
rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate);

const char* fname = feature->GetFieldAsString("url");
Expand All @@ -124,11 +123,11 @@ bool Usgs3dep1meterDemRaster::findRasters(raster_finder_t* finder)
raster_info_t rinfo;
rinfo.dataIsElevation = true;
rinfo.tag = VALUE_TAG;
rinfo.fileName = filePath + fileName.substr(pos);
rinfo.fileId = finder->fileDict.add(filePath + fileName.substr(pos));
rgroup->infovect.push_back(rinfo);
}

// mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size());
// mlog(DEBUG, "Added group with %ld rasters", rgroup->infovect.size());
finder->rasterGroups.push_back(rgroup);
}
// mlog(DEBUG, "Found %ld raster groups", finder->rasterGroups.size());
Expand Down
1 change: 0 additions & 1 deletion packages/arrow/ArrowSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ void* ArrowSampler::mainThread(void* parm)

/* Release since not needed anymore */
sampler->samples.clear();
sampler->file_ids.clear();
}

try
Expand Down
1 change: 0 additions & 1 deletion packages/arrow/ArrowSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ class ArrowSampler: public LuaObject
RasterObject* robj;
ArrowSampler* obj;
List<sample_list_t*> samples;
std::set<uint64_t> file_ids;
std::vector<std::pair<uint64_t, const char*>> filemap;

explicit BatchSampler (const char* _rkey, RasterObject* _robj, ArrowSampler* _obj);
Expand Down
29 changes: 5 additions & 24 deletions packages/arrow/ArrowSamplerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,26 +135,13 @@ bool ArrowSamplerImpl::processSamples(ArrowSampler::batch_sampler_t* sampler)
if(status)
{
/* Create raster file map <id, filename> */
Dictionary<uint64_t>::Iterator iterator(sampler->robj->fileDictGet());
for(int i = 0; i < iterator.length; i++)
const std::set<uint64_t> &sampleIds = sampler->robj->fileDictGetSampleIds();
for(std::set<uint64_t>::const_iterator it = sampleIds.begin(); it != sampleIds.end(); it++)
{
const char* name = iterator[i].key;
const uint64_t id = iterator[i].value;

/* For some data sets, dictionary contains quality mask rasters in addition to data rasters.
* Only add rasters with id present in the samples
*/
if(sampler->file_ids.find(id) != sampler->file_ids.end())
{
sampler->filemap.emplace_back(id, name);
}
const uint64_t fileId = *it;
const char* name = sampler->robj->fileDictGet(fileId);
sampler->filemap.emplace_back(fileId, name);
}

/* Sort the map with increasing file id */
std::sort(sampler->filemap.begin(), sampler->filemap.end(),
[](const std::pair<uint64_t, std::string>& a, const std::pair<uint64_t, std::string>& b)
{ return a.first < b.first; });

}
else
{
Expand Down Expand Up @@ -534,9 +521,6 @@ bool ArrowSamplerImpl::makeColumnsWithLists(ArrowSampler::batch_sampler_t* sampl
PARQUET_THROW_NOT_OK(stdev_builder->Append(sample->stats.stdev));
PARQUET_THROW_NOT_OK(mad_builder->Append(sample->stats.mad));
}

/* Collect all fileIds used by samples - duplicates are ignored */
sampler->file_ids.insert(sample->fileId);
}
}

Expand Down Expand Up @@ -686,9 +670,6 @@ bool ArrowSamplerImpl::makeColumnsWithOneSample(ArrowSampler::batch_sampler_t* s
PARQUET_THROW_NOT_OK(stdev_builder.Append(sample->stats.stdev));
PARQUET_THROW_NOT_OK(mad_builder.Append(sample->stats.mad));
}

/* Collect all fileIds used by samples - duplicates are ignored */
sampler->file_ids.insert(sample->fileId);
}

/* Finish the builders */
Expand Down
2 changes: 2 additions & 0 deletions packages/geo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ if (GDAL_FOUND AND PROJ_FOUND AND TIFF_FOUND)
${CMAKE_CURRENT_LIST_DIR}/RasterObject.cpp
${CMAKE_CURRENT_LIST_DIR}/RasterSampler.cpp
${CMAKE_CURRENT_LIST_DIR}/RasterSubset.cpp
${CMAKE_CURRENT_LIST_DIR}/RasterFileDictionary.cpp
${CMAKE_CURRENT_LIST_DIR}/GeoFields.cpp
${CMAKE_CURRENT_LIST_DIR}/GeoLib.cpp
${CMAKE_CURRENT_LIST_DIR}/GeoRtree.cpp
Expand All @@ -61,6 +62,7 @@ if (GDAL_FOUND AND PROJ_FOUND AND TIFF_FOUND)
${CMAKE_CURRENT_LIST_DIR}/RasterSampler.h
${CMAKE_CURRENT_LIST_DIR}/RasterSample.h
${CMAKE_CURRENT_LIST_DIR}/RasterSubset.h
${CMAKE_CURRENT_LIST_DIR}/RasterFileDictionary.h
${CMAKE_CURRENT_LIST_DIR}/GeoFields.cpp
${CMAKE_CURRENT_LIST_DIR}/GeoLib.h
${CMAKE_CURRENT_LIST_DIR}/GeoRtree.h
Expand Down
Loading

0 comments on commit 53c940b

Please sign in to comment.