From 53c940bf62a3409e1ecce879a46e3cdc79ac28f4 Mon Sep 17 00:00:00 2001 From: Eric Lidwa Date: Fri, 25 Oct 2024 21:43:14 +0000 Subject: [PATCH] Raster names are stored in RasterFileDictionary, no more duplicated strings --- clients/python/tests/test_arcticdem.py | 5 +- datasets/gebco/package/GebcoBathyRaster.cpp | 11 +- datasets/landsat/package/LandsatHlsRaster.cpp | 21 +- datasets/pgc/package/PgcDemStripsRaster.cpp | 13 +- .../package/Usgs3dep1meterDemRaster.cpp | 5 +- packages/arrow/ArrowSampler.cpp | 1 - packages/arrow/ArrowSampler.h | 1 - packages/arrow/ArrowSamplerImpl.cpp | 29 +- packages/geo/CMakeLists.txt | 2 + packages/geo/GeoIndexedRaster.cpp | 107 ++++--- packages/geo/GeoIndexedRaster.h | 22 +- packages/geo/GeoRaster.cpp | 2 +- packages/geo/RasterFileDictionary.cpp | 152 ++++++++++ packages/geo/RasterFileDictionary.h | 89 ++++++ packages/geo/RasterObject.cpp | 281 +++++++----------- packages/geo/RasterObject.h | 55 ++-- packages/geo/RasterSampler.cpp | 12 +- packages/geo/UT_RasterSample.cpp | 77 ++++- packages/geo/UT_RasterSample.h | 1 + packages/geo/UT_RasterSubset.cpp | 2 +- .../parquet_sampler_arcticdem_perf_test.lua | 4 +- .../parquet_sampler_landsat_perf_test.lua | 8 +- 22 files changed, 588 insertions(+), 312 deletions(-) create mode 100644 packages/geo/RasterFileDictionary.cpp create mode 100644 packages/geo/RasterFileDictionary.h diff --git a/clients/python/tests/test_arcticdem.py b/clients/python/tests/test_arcticdem.py index ded594986..4069d88c5 100644 --- a/clients/python/tests/test_arcticdem.py +++ b/clients/python/tests/test_arcticdem.py @@ -113,8 +113,9 @@ def test_indexed_raster(self, init): "samples": {"strips": {"asset": "arcticdem-strips", "with_flags": True}} } gdf = icesat2.atl06p(parms, resources=['ATL03_20191108234307_06580503_005_01.h5']) assert init - assert len(gdf.attrs['file_directory']) == 32 - for file_id in range(16): + assert len(gdf.attrs['file_directory']) == 16 + for file_id in range(0, 16, 2): assert file_id in gdf.attrs['file_directory'].keys() assert '/pgc-opendata-dems/arcticdem/strips/' in gdf.attrs['file_directory'][file_id] + assert '_dem.tif' in gdf.attrs['file_directory'][file_id] # only dems, no flags diff --git a/datasets/gebco/package/GebcoBathyRaster.cpp b/datasets/gebco/package/GebcoBathyRaster.cpp index fbcec958d..ac35cb7c3 100644 --- a/datasets/gebco/package/GebcoBathyRaster.cpp +++ b/datasets/gebco/package/GebcoBathyRaster.cpp @@ -86,7 +86,6 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder) if (!rastergeo->Intersects(geo)) continue; rasters_group_t* rgroup = new rasters_group_t; - rgroup->featureId = feature->GetFieldAsString("id"); rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate); const char* dataFile = feature->GetFieldAsString("data_raster"); @@ -95,7 +94,7 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder) raster_info_t rinfo; rinfo.dataIsElevation = true; rinfo.tag = VALUE_TAG; - rinfo.fileName = filePath + "/" + dataFile; + rinfo.fileId = finder->fileDict.add(filePath + "/" + dataFile); rgroup->infovect.push_back(rinfo); } @@ -106,16 +105,16 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder) { raster_info_t rinfo; rinfo.dataIsElevation = false; - rinfo.tag = FLAGS_TAG; - rinfo.fileName = filePath + "/" + flagsFile; + rinfo.tag = FLAGS_TAG; + rinfo.fileId = finder->fileDict.add(filePath + "/" + flagsFile); rgroup->infovect.push_back(rinfo); } } rgroup->infovect.shrink_to_fit(); - mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size()); + mlog(DEBUG, "Added group with %ld rasters", rgroup->infovect.size()); for(unsigned j = 0; j < rgroup->infovect.size(); j++) - mlog(DEBUG, " %s", rgroup->infovect[j].fileName.c_str()); + mlog(DEBUG, " %s", finder->fileDict.get(rgroup->infovect[j].fileId)); // Add the group finder->rasterGroups.push_back(rgroup); diff --git a/datasets/landsat/package/LandsatHlsRaster.cpp b/datasets/landsat/package/LandsatHlsRaster.cpp index f3ca8880f..c62847077 100644 --- a/datasets/landsat/package/LandsatHlsRaster.cpp +++ b/datasets/landsat/package/LandsatHlsRaster.cpp @@ -168,7 +168,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder) /* Set raster group time and group featureId */ rasters_group_t* rgroup = new rasters_group_t; - rgroup->featureId = feature->GetFieldAsString("id"); + rgroup->featureId = StringLib::duplicate(feature->GetFieldAsString("id")); rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate); /* Find each requested band in the index file */ @@ -188,7 +188,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder) raster_info_t rinfo; rinfo.dataIsElevation = false; /* All bands are not elevation */ - rinfo.fileName = filePath + fileName.substr(pos); + rinfo.fileId = finder->fileDict.add(filePath + fileName.substr(pos)); if(strcmp(bandName, "Fmask") == 0) { @@ -207,7 +207,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder) } } - // mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size()); + // mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId, rgroup->infovect.size()); finder->rasterGroups.push_back(rgroup); } // mlog(DEBUG, "Found %ld raster groups", finder->rasterGroups.size()); @@ -274,10 +274,11 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr bool isS2 = false; std::size_t pos; - pos = rgroup->featureId.find("HLS.L30"); + const std::string featureId = rgroup->featureId; + pos = featureId.find("HLS.L30"); if(pos != std::string::npos) isL8 = true; - pos = rgroup->featureId.find("HLS.S30"); + pos = featureId.find("HLS.S30"); if(pos != std::string::npos) isS2 = true; if(!isL8 && !isS2) @@ -295,7 +296,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr { for(const auto& rinfo : rgroup->infovect) { - const char* key = rinfo.fileName.c_str(); + const char* key = fileDictGet(rinfo.fileId); cacheitem_t* item; if(cache.find(key, &item)) { @@ -407,12 +408,12 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr } const double groupTime = rgroup->gpsTime / 1000; - const std::string groupName = rgroup->featureId + " {\"algo\": \""; + const std::string groupName = featureId + " {\"algo\": \""; /* Calculate algos - make sure that all the necessary bands were read */ if(ndsi) { - RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDSI\"}")); + RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDSI\"}")); if((green != invalid) && (swir16 != invalid)) sample->value = (green - swir16) / (green + swir16); else sample->value = invalid; @@ -421,7 +422,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr if(ndvi) { - RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDVI\"}")); + RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDVI\"}")); if((red != invalid) && (nir08 != invalid)) sample->value = (nir08 - red) / (nir08 + red); else sample->value = invalid; @@ -430,7 +431,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr if(ndwi) { - RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDWI\"}")); + RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDWI\"}")); if((nir08 != invalid) && (swir16 != invalid)) sample->value = (nir08 - swir16) / (nir08 + swir16); else sample->value = invalid; diff --git a/datasets/pgc/package/PgcDemStripsRaster.cpp b/datasets/pgc/package/PgcDemStripsRaster.cpp index 0646ed675..c434c9651 100644 --- a/datasets/pgc/package/PgcDemStripsRaster.cpp +++ b/datasets/pgc/package/PgcDemStripsRaster.cpp @@ -228,7 +228,7 @@ bool PgcDemStripsRaster::findRasters(raster_finder_t* finder) raster_info_t demRinfo; demRinfo.dataIsElevation = true; demRinfo.tag = VALUE_TAG; - demRinfo.fileName = fileName; + demRinfo.fileId = finder->fileDict.add(fileName); /* bitmask raster, ie flags_file */ if(parms->flags_file) @@ -242,13 +242,12 @@ bool PgcDemStripsRaster::findRasters(raster_finder_t* finder) } else fileName.clear(); - raster_info_t flagsRinfo; - flagsRinfo.dataIsElevation = false; - flagsRinfo.tag = FLAGS_TAG; - flagsRinfo.fileName = fileName; - - if(!flagsRinfo.fileName.empty()) + if(!fileName.empty()) { + raster_info_t flagsRinfo; + flagsRinfo.dataIsElevation = false; + flagsRinfo.tag = FLAGS_TAG; + flagsRinfo.fileId = finder->fileDict.add(fileName); rgroup->infovect.push_back(flagsRinfo); } } diff --git a/datasets/usgs3dep/package/Usgs3dep1meterDemRaster.cpp b/datasets/usgs3dep/package/Usgs3dep1meterDemRaster.cpp index 5839ea058..3a65aef58 100644 --- a/datasets/usgs3dep/package/Usgs3dep1meterDemRaster.cpp +++ b/datasets/usgs3dep/package/Usgs3dep1meterDemRaster.cpp @@ -112,7 +112,6 @@ bool Usgs3dep1meterDemRaster::findRasters(raster_finder_t* finder) if (!rastergeo->Intersects(geo)) continue; rasters_group_t* rgroup = new rasters_group_t; - rgroup->featureId = feature->GetFieldAsString("id"); rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate); const char* fname = feature->GetFieldAsString("url"); @@ -124,11 +123,11 @@ bool Usgs3dep1meterDemRaster::findRasters(raster_finder_t* finder) raster_info_t rinfo; rinfo.dataIsElevation = true; rinfo.tag = VALUE_TAG; - rinfo.fileName = filePath + fileName.substr(pos); + rinfo.fileId = finder->fileDict.add(filePath + fileName.substr(pos)); rgroup->infovect.push_back(rinfo); } - // mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size()); + // mlog(DEBUG, "Added group with %ld rasters", rgroup->infovect.size()); finder->rasterGroups.push_back(rgroup); } // mlog(DEBUG, "Found %ld raster groups", finder->rasterGroups.size()); diff --git a/packages/arrow/ArrowSampler.cpp b/packages/arrow/ArrowSampler.cpp index e806cf5ee..f52e2fe3e 100644 --- a/packages/arrow/ArrowSampler.cpp +++ b/packages/arrow/ArrowSampler.cpp @@ -182,7 +182,6 @@ void* ArrowSampler::mainThread(void* parm) /* Release since not needed anymore */ sampler->samples.clear(); - sampler->file_ids.clear(); } try diff --git a/packages/arrow/ArrowSampler.h b/packages/arrow/ArrowSampler.h index 93160634a..086470d9f 100644 --- a/packages/arrow/ArrowSampler.h +++ b/packages/arrow/ArrowSampler.h @@ -91,7 +91,6 @@ class ArrowSampler: public LuaObject RasterObject* robj; ArrowSampler* obj; List samples; - std::set file_ids; std::vector> filemap; explicit BatchSampler (const char* _rkey, RasterObject* _robj, ArrowSampler* _obj); diff --git a/packages/arrow/ArrowSamplerImpl.cpp b/packages/arrow/ArrowSamplerImpl.cpp index 1eef5baf3..11f47789e 100644 --- a/packages/arrow/ArrowSamplerImpl.cpp +++ b/packages/arrow/ArrowSamplerImpl.cpp @@ -135,26 +135,13 @@ bool ArrowSamplerImpl::processSamples(ArrowSampler::batch_sampler_t* sampler) if(status) { /* Create raster file map */ - Dictionary::Iterator iterator(sampler->robj->fileDictGet()); - for(int i = 0; i < iterator.length; i++) + const std::set &sampleIds = sampler->robj->fileDictGetSampleIds(); + for(std::set::const_iterator it = sampleIds.begin(); it != sampleIds.end(); it++) { - const char* name = iterator[i].key; - const uint64_t id = iterator[i].value; - - /* For some data sets, dictionary contains quality mask rasters in addition to data rasters. - * Only add rasters with id present in the samples - */ - if(sampler->file_ids.find(id) != sampler->file_ids.end()) - { - sampler->filemap.emplace_back(id, name); - } + const uint64_t fileId = *it; + const char* name = sampler->robj->fileDictGet(fileId); + sampler->filemap.emplace_back(fileId, name); } - - /* Sort the map with increasing file id */ - std::sort(sampler->filemap.begin(), sampler->filemap.end(), - [](const std::pair& a, const std::pair& b) - { return a.first < b.first; }); - } else { @@ -534,9 +521,6 @@ bool ArrowSamplerImpl::makeColumnsWithLists(ArrowSampler::batch_sampler_t* sampl PARQUET_THROW_NOT_OK(stdev_builder->Append(sample->stats.stdev)); PARQUET_THROW_NOT_OK(mad_builder->Append(sample->stats.mad)); } - - /* Collect all fileIds used by samples - duplicates are ignored */ - sampler->file_ids.insert(sample->fileId); } } @@ -686,9 +670,6 @@ bool ArrowSamplerImpl::makeColumnsWithOneSample(ArrowSampler::batch_sampler_t* s PARQUET_THROW_NOT_OK(stdev_builder.Append(sample->stats.stdev)); PARQUET_THROW_NOT_OK(mad_builder.Append(sample->stats.mad)); } - - /* Collect all fileIds used by samples - duplicates are ignored */ - sampler->file_ids.insert(sample->fileId); } /* Finish the builders */ diff --git a/packages/geo/CMakeLists.txt b/packages/geo/CMakeLists.txt index 1b9a7455a..88a6a5ad7 100644 --- a/packages/geo/CMakeLists.txt +++ b/packages/geo/CMakeLists.txt @@ -36,6 +36,7 @@ if (GDAL_FOUND AND PROJ_FOUND AND TIFF_FOUND) ${CMAKE_CURRENT_LIST_DIR}/RasterObject.cpp ${CMAKE_CURRENT_LIST_DIR}/RasterSampler.cpp ${CMAKE_CURRENT_LIST_DIR}/RasterSubset.cpp + ${CMAKE_CURRENT_LIST_DIR}/RasterFileDictionary.cpp ${CMAKE_CURRENT_LIST_DIR}/GeoFields.cpp ${CMAKE_CURRENT_LIST_DIR}/GeoLib.cpp ${CMAKE_CURRENT_LIST_DIR}/GeoRtree.cpp @@ -61,6 +62,7 @@ if (GDAL_FOUND AND PROJ_FOUND AND TIFF_FOUND) ${CMAKE_CURRENT_LIST_DIR}/RasterSampler.h ${CMAKE_CURRENT_LIST_DIR}/RasterSample.h ${CMAKE_CURRENT_LIST_DIR}/RasterSubset.h + ${CMAKE_CURRENT_LIST_DIR}/RasterFileDictionary.h ${CMAKE_CURRENT_LIST_DIR}/GeoFields.cpp ${CMAKE_CURRENT_LIST_DIR}/GeoLib.h ${CMAKE_CURRENT_LIST_DIR}/GeoRtree.h diff --git a/packages/geo/GeoIndexedRaster.cpp b/packages/geo/GeoIndexedRaster.cpp index c1572fd37..8f0e6570a 100644 --- a/packages/geo/GeoIndexedRaster.cpp +++ b/packages/geo/GeoIndexedRaster.cpp @@ -92,9 +92,12 @@ GeoIndexedRaster::Reader::~Reader (void) /*---------------------------------------------------------------------------- * RasterFinder Constructor *----------------------------------------------------------------------------*/ -GeoIndexedRaster::RasterFinder::RasterFinder (const OGRGeometry* _geo, const std::vector* _featuresList): +GeoIndexedRaster::RasterFinder::RasterFinder (const OGRGeometry* _geo, + const std::vector* _featuresList, + RasterFileDictionary& _fileDict): geo(_geo), - featuresList(_featuresList) + featuresList(_featuresList), + fileDict(_fileDict) { } @@ -135,7 +138,7 @@ void GeoIndexedRaster::deinit (void) /*---------------------------------------------------------------------------- - * getSamples + * getSamples - serial sampling *----------------------------------------------------------------------------*/ uint32_t GeoIndexedRaster::getSamples(const MathLib::point_3d_t& point, int64_t gps, List& slist, void* param) { @@ -152,7 +155,7 @@ uint32_t GeoIndexedRaster::getSamples(const MathLib::point_3d_t& point, int64_t /* Sample Rasters */ if(sample(&ogrPoint, gps, &groupList)) { - /* Populate Return Vector of Samples (slist) */ + /* Populate Return List of Samples (slist) */ const GroupOrdering::Iterator iter(groupList); for(int i = 0; i < iter.length; i++) { @@ -166,6 +169,9 @@ uint32_t GeoIndexedRaster::getSamples(const MathLib::point_3d_t& point, int64_t getGroupSamples(rgroup, slist, flags); } } + + /* Update file dictionary */ + fileDictSetSamples(&slist); } catch (const RunTimeException &e) { @@ -195,7 +201,7 @@ uint32_t GeoIndexedRaster::getSamples(const MathLib::point_3d_t& point, int64_t } /*---------------------------------------------------------------------------- - * getSamples + * getSamples - batch sampling *----------------------------------------------------------------------------*/ uint32_t GeoIndexedRaster::getSamples(const std::vector& points, List& sllist, void* param) { @@ -204,10 +210,8 @@ uint32_t GeoIndexedRaster::getSamples(const std::vector& points, L lockSampling(); perfStats.clear(); - - /* Clear raster cache and file dictionary used by serialized getSamples */ - cache.clear(); - fileDictClear(); + cache.clear(); /* Clear cache used by serial sampling */ + fileDict.clear(); /* Start with empty file dictionary */ /* Vector of points and their associated raster groups */ std::vector pointsGroups; @@ -229,13 +233,13 @@ uint32_t GeoIndexedRaster::getSamples(const std::vector& points, L /* Rasters to points map */ raster_points_map_t rasterToPointsMap; - /* For all points from the caller, create a vector of raster group lists */ + /* For all points create a vector of raster group lists */ if(!findAllGroups(&points, pointsGroups, rasterToPointsMap)) { throw RunTimeException(CRITICAL, RTE_ERROR, "Error creating groups"); } - /* For all points from the caller, create a vector of unique rasters */ + /* For all points create a vector of unique rasters */ if(!findUniqueRasters(uniqueRasters, pointsGroups, rasterToPointsMap)) { throw RunTimeException(CRITICAL, RTE_ERROR, "Error finding unique rasters"); @@ -244,7 +248,7 @@ uint32_t GeoIndexedRaster::getSamples(const std::vector& points, L /* rastersToPointsMap is no longer needed */ } - /* For all unique rasters, sample them */ + /* Sample all unique rasters */ if(!sampleUniqueRasters(uniqueRasters)) { throw RunTimeException(CRITICAL, RTE_ERROR, "Error sampling unique rasters"); @@ -472,7 +476,7 @@ void GeoIndexedRaster::getGroupSamples(const rasters_group_t* rgroup, Listinfovect) { - const char* key = rinfo.fileName.c_str(); + const char* key = fileDict.get(rinfo.fileId); cacheitem_t* item; if(cache.find(key, &item)) { @@ -532,7 +536,7 @@ uint32_t GeoIndexedRaster::getGroupFlags(const rasters_group_t* rgroup) if(strcmp(FLAGS_TAG, rinfo.tag.c_str()) != 0) continue; cacheitem_t* item; - const char* key = rinfo.fileName.c_str(); + const char* key = fileDict.get(rinfo.fileId); if(cache.find(key, &item)) { const RasterSample* _sample = item->sample; @@ -749,7 +753,7 @@ bool GeoIndexedRaster::sample(OGRGeometry* geo, int64_t gps, GroupOrdering* grou /* Query the R-tree with the OGRPoint and get the result features */ geoRtree.query(geo, foundFeatures); - raster_finder_t finder(geo, &foundFeatures); + raster_finder_t finder(geo, &foundFeatures, fileDict); if(!findRasters(&finder)) return false; @@ -760,7 +764,7 @@ bool GeoIndexedRaster::sample(OGRGeometry* geo, int64_t gps, GroupOrdering* grou groupList->add(groupList->length(), rgroup); } - if(!filterRasters(gps, groupList)) + if(!filterRasters(gps, groupList, fileDict)) return false; uint32_t rasters2sample = 0; @@ -954,7 +958,7 @@ void* GeoIndexedRaster::batchReaderThread(void *param) { unique_raster_t* ur = breader->uraster; GdalRaster* raster = new GdalRaster(breader->obj->parms, - ur->fileName, + breader->obj->fileDict.get(ur->fileId), 0, /* Sample collecting code will set it to group's gpsTime */ ur->fileId, ur->dataIsElevation, @@ -1012,22 +1016,22 @@ void* GeoIndexedRaster::groupsFinderThread(void *param) // mlog(DEBUG, "Found %zu features for point %u", foundFeatures.size(), i); /* Clone found features since OGRFeature is not thread safe */ - std::vector localFeatures; - localFeatures.reserve(foundFeatures.size()); + std::vector threadFeatures; + threadFeatures.reserve(foundFeatures.size()); for(OGRFeature* feature : foundFeatures) { - localFeatures.push_back(feature->Clone()); + threadFeatures.push_back(feature->Clone()); } /* Set finder for the found features */ - RasterFinder finder(&ogrPoint, &localFeatures); + RasterFinder finder(&ogrPoint, &threadFeatures, gf->threadFileDict); /* Find rasters intersecting with ogrPoint */ gf->obj->findRasters(&finder); /* Destroy cloned features */ - for(OGRFeature* feature : localFeatures) + for(OGRFeature* feature : threadFeatures) { OGRFeature::DestroyFeature(feature); } @@ -1041,7 +1045,7 @@ void* GeoIndexedRaster::groupsFinderThread(void *param) /* Filter rasters based on POI time */ const int64_t gps = gf->obj->usePOItime() ? pinfo.gps : 0.0; - gf->obj->filterRasters(gps, groupList); + gf->obj->filterRasters(gps, groupList, gf->threadFileDict); /* Add found rasters which passed the filter to pointsGroups */ gf->pointsGroups.emplace_back(point_groups_t{ogrPoint, i, groupList}); @@ -1053,7 +1057,8 @@ void* GeoIndexedRaster::groupsFinderThread(void *param) const rasters_group_t* rgroup = iter[j].value; for(const raster_info_t& rinfo : rgroup->infovect) { - gf->rasterToPointsMap[rinfo.fileName].insert(i); + const char* fileName = gf->threadFileDict.get(rinfo.fileId); + gf->rasterToPointsMap[fileName].insert(i); } } } @@ -1196,7 +1201,7 @@ bool GeoIndexedRaster::updateCache(uint32_t& rasters2sample, const GroupOrdering const rasters_group_t* rgroup = group_iter[i].value; for(const auto& rinfo : rgroup->infovect) { - const char* key = rinfo.fileName.c_str(); + const char* key = fileDict.get(rinfo.fileId); cacheitem_t* item; const bool inCache = cache.find(key, &item); if(!inCache) @@ -1205,9 +1210,9 @@ bool GeoIndexedRaster::updateCache(uint32_t& rasters2sample, const GroupOrdering note use of bbox in construcutor - it limits area of interest to the extent of vector index file */ item = new cacheitem_t; - item->raster = new GdalRaster(parms, rinfo.fileName, + item->raster = new GdalRaster(parms, key, static_cast(rgroup->gpsTime / 1000), - fileDictAdd(rinfo.fileName), + rinfo.fileId, rinfo.dataIsElevation, crscb, &bbox); item->sample = NULL; item->subset = NULL; @@ -1259,7 +1264,7 @@ bool GeoIndexedRaster::updateCache(uint32_t& rasters2sample, const GroupOrdering /*---------------------------------------------------------------------------- * filterRasters *----------------------------------------------------------------------------*/ -bool GeoIndexedRaster::filterRasters(int64_t gps, GroupOrdering* groupList) +bool GeoIndexedRaster::filterRasters(int64_t gps, GroupOrdering* groupList, RasterFileDictionary& dict) { /* NOTE: temporal filter is applied in openGeoIndex() */ if(!parms->url_substring.value.empty() || parms->filter_doy_range) @@ -1275,7 +1280,8 @@ bool GeoIndexedRaster::filterRasters(int64_t gps, GroupOrdering* groupList) /* URL filter */ if(!parms->url_substring.value.empty()) { - if(rinfo.fileName.find(parms->url_substring.value) == std::string::npos) + const std::string fileName = dict.get(rinfo.fileId); + if(fileName.find(parms->url_substring.value) == std::string::npos) { removeGroup = true; break; @@ -1468,14 +1474,32 @@ bool GeoIndexedRaster::findAllGroups(const std::vector* points, mlog(INFO, "All groups finders time: %lf", TimeLib::latchtime() - startTime); - /* Merge the pointGroups for each thread */ + /* Merge the pointGroups from each thread */ mlog(INFO, "Merging point groups from all threads"); for(GroupsFinder* gf : rgroupFinders) { - pointsGroups.insert(pointsGroups.end(), gf->pointsGroups.begin(), gf->pointsGroups.end()); + /* Threads used local file dictionary, combine them and update fileId */ + for(const point_groups_t& pg: gf->pointsGroups) + { + const GroupOrdering::Iterator iter(*pg.groupList); + for (int64_t i = 0; i < iter.length; i++) + { + rasters_group_t *rgroup = iter[i].value; + for (raster_info_t &rinfo : rgroup->infovect) + { + /* Get file from thread file dictionary */ + const std::string fileName = gf->threadFileDict.get(rinfo.fileId); + + /* Add to main file dictionary */ + rinfo.fileId = fileDict.add(fileName); + } + } + + pointsGroups.push_back(pg); + } /* Merge the rasterToPointsMap for each thread */ - for(const raster_points_map_t::value_type& pair : gf->rasterToPointsMap) + for (const raster_points_map_t::value_type &pair : gf->rasterToPointsMap) { rasterToPointsMap[pair.first].insert(pair.second.begin(), pair.second.end()); } @@ -1535,7 +1559,8 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector& uniqueRa for(raster_info_t& rinfo : rgroup->infovect) { /* Is this raster already in the list of unique rasters? */ - auto it = fileIndexMap.find(rinfo.fileName); + const std::string fileName = fileDict.get(rinfo.fileId); + auto it = fileIndexMap.find(fileName); if(it != fileIndexMap.end()) { /* Raster is already in the vector of unique rasters, get index from map and update uraster pointer */ @@ -1544,15 +1569,14 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector& uniqueRa else { /* Raster is not in the vector of unique rasters */ - unique_raster_t* ur = new unique_raster_t(rinfo.dataIsElevation, rinfo.fileName); - ur->fileId = fileDictAdd(rinfo.fileName); + unique_raster_t* ur = new unique_raster_t(rinfo.dataIsElevation, rinfo.fileId); uniqueRasters.push_back(ur); /* Set pointer in rinfo to new unique raster */ rinfo.uraster = ur; /* Update index map */ - fileIndexMap[rinfo.fileName] = uniqueRasters.size() - 1; + fileIndexMap[fileName] = uniqueRasters.size() - 1; } } } @@ -1562,7 +1586,7 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector& uniqueRa mlog(DEBUG, "Finding points for unique rasters"); for(unique_raster_t* ur : uniqueRasters) { - auto it = rasterToPointsMap.find(ur->fileName); + auto it = rasterToPointsMap.find(fileDict.get(ur->fileId)); if(it != rasterToPointsMap.end()) { for(const uint32_t pointIndx : it->second) @@ -1742,9 +1766,12 @@ bool GeoIndexedRaster::collectSamples(const std::vector& pointsG for(SampleCollector* sc : sampleCollectors) { const std::vector& slvector = sc->slvector; - for(sample_list_t* sl : slvector) + for(sample_list_t* slist : slvector) { - sllist.add(sl); + /* Update file dictionary */ + fileDictSetSamples(slist); + + sllist.add(slist); } ssErrors |= sc->ssErrors; delete sc; diff --git a/packages/geo/GeoIndexedRaster.h b/packages/geo/GeoIndexedRaster.h index e7ad6a143..cfeae8017 100644 --- a/packages/geo/GeoIndexedRaster.h +++ b/packages/geo/GeoIndexedRaster.h @@ -94,8 +94,8 @@ class GeoIndexedRaster: public RasterObject /** Raster information needed for sampling */ typedef struct RasterInfo { bool dataIsElevation; - std::string tag; // "Value", "Flags", "Date" - std::string fileName; // Raster file name + std::string tag; // "Dem", "Flags", "Date" + uint64_t fileId; // file dictionary id UniqueRaster* uraster; // Pointer to the unique raster which contains the sample for this raster RasterInfo(void): dataIsElevation(false), uraster(NULL) {} @@ -103,22 +103,22 @@ class GeoIndexedRaster: public RasterObject /* Group of rasters belonging to the same geojson stac catalog feature */ typedef struct RaserGroup { - std::string featureId; // stac catalog feature id + char* featureId; // stac catalog feature id std::vector infovect; // vector of rasters belonging to the same stac catalog feature TimeLib::gmt_time_t gmtDate; // feature date (can be computed from start/end dates) int64_t gpsTime; // feature gps time - RaserGroup(void): gmtDate{0,0,0,0,0,0}, gpsTime(0) {} + RaserGroup(void): featureId(NULL), gmtDate{0,0,0,0,0,0}, gpsTime(0) {} + ~RaserGroup(void) { delete[] featureId; } } rasters_group_t; /* Raster and associated points to sample, used by batch sampling */ typedef struct UniqueRaster { bool dataIsElevation; - const std::string& fileName; - uint64_t fileId; // fileDictionary id + uint64_t fileId; // file dictionary id std::vector pointSamples; // vector of samples for each point in this raster - explicit UniqueRaster(bool _dataIsElevation, const std::string& _fileName): - dataIsElevation(_dataIsElevation), fileName(_fileName), fileId(0) {} + explicit UniqueRaster(bool _dataIsElevation, uint64_t _fileId): + dataIsElevation(_dataIsElevation), fileId(_fileId) {} } unique_raster_t; typedef Ordering GroupOrdering; @@ -184,6 +184,7 @@ class GeoIndexedRaster: public RasterObject const std::vector* points; std::vector pointsGroups; raster_points_map_t rasterToPointsMap; + RasterFileDictionary threadFileDict; explicit GroupsFinder (GeoIndexedRaster* _obj, const std::vector* _points); } groups_finder_t; @@ -193,7 +194,8 @@ class GeoIndexedRaster: public RasterObject const OGRGeometry* geo; const std::vector* featuresList; // features to test for intersection with geo std::vector rasterGroups; // result raster groups which intersect with geo - explicit RasterFinder(const OGRGeometry* geo, const std::vector* _featuresList); + RasterFileDictionary& fileDict; + explicit RasterFinder(const OGRGeometry* geo, const std::vector* _featuresList, RasterFileDictionary& _fileDict); } raster_finder_t; /*-------------------------------------------------------------------- @@ -309,7 +311,7 @@ class GeoIndexedRaster: public RasterObject bool createBatchReaderThreads(uint32_t rasters2sample); bool updateCache (uint32_t& rasters2sample, const GroupOrdering* groupList); - bool filterRasters (int64_t gps, GroupOrdering* groupList); + bool filterRasters (int64_t gps, GroupOrdering* groupList, RasterFileDictionary& dict); static OGRGeometry* getConvexHull (const std::vector* points); void applySpatialFilter (OGRLayer* layer, const std::vector* points); diff --git a/packages/geo/GeoRaster.cpp b/packages/geo/GeoRaster.cpp index 5a8449bcf..23ac319c1 100644 --- a/packages/geo/GeoRaster.cpp +++ b/packages/geo/GeoRaster.cpp @@ -58,7 +58,7 @@ void GeoRaster::deinit (void) *----------------------------------------------------------------------------*/ GeoRaster::GeoRaster(lua_State *L, RequestFields* rqst_parms, const char* key, const std::string& _fileName, double _gpsTime, bool dataIsElevation, GdalRaster::overrideCRS_t cb): RasterObject(L, rqst_parms, key), - raster(parms, _fileName, _gpsTime, fileDictAdd(_fileName), dataIsElevation, cb) + raster(parms, _fileName, _gpsTime, fileDict.add(_fileName, true), dataIsElevation, cb) { /* Add Lua Functions */ LuaEngine::setAttrFunc(L, "dim", luaDimensions); diff --git a/packages/geo/RasterFileDictionary.cpp b/packages/geo/RasterFileDictionary.cpp new file mode 100644 index 000000000..4f0aa4c7e --- /dev/null +++ b/packages/geo/RasterFileDictionary.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, University of Washington + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the University of Washington nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS + * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/****************************************************************************** + * INCLUDES + ******************************************************************************/ + +#include "RasterFileDictionary.h" +#include "EventLib.h" + + +/****************************************************************************** + * STATIC DATA + ******************************************************************************/ + +Mutex RasterFileDictionary::mutex; + +/****************************************************************************** + * PUBLIC METHODS + ******************************************************************************/ + +/* + * NOTE: only add() method is thread safe + */ + +/*---------------------------------------------------------------------------- + * add + *----------------------------------------------------------------------------*/ +uint64_t RasterFileDictionary::add(const string& fileName, bool sample) +{ + uint64_t id; + mutex.lock(); + { + if(!fileDict.find(fileName.c_str(), &id)) + { + id = keySpace | fileVector.size(); + fileDict.add(fileName.c_str(), id); + fileVector.push_back(fileName); + } + + if(sample) + { + sampleIdSet.insert(id); + } + } + mutex.unlock(); + return id; +} + +/*---------------------------------------------------------------------------- + * get + *----------------------------------------------------------------------------*/ +const char* RasterFileDictionary::get(uint64_t fileId) +{ + const char* fileName = ""; + + /* Mask upper 32 bits to get index into vector */ + const uint32_t index = static_cast(fileId & 0xFFFFFFFF); + if(index < fileVector.size()) + { + fileName = fileVector[index].c_str(); + } + return fileName; +} + +/*---------------------------------------------------------------------------- + * setSample + *----------------------------------------------------------------------------*/ +void RasterFileDictionary::setSample(uint64_t sampleFileId) +{ + /* Make sure sampleFileId is valid */ + const char* fileName = get(sampleFileId); + if(fileName != NULL) + { + sampleIdSet.insert(sampleFileId); + } + else + { + mlog(ERROR, "Invalid sampleFileId: %lu", sampleFileId); + } +} + +/*---------------------------------------------------------------------------- + * clear + *----------------------------------------------------------------------------*/ +void RasterFileDictionary::clear(void) +{ + fileDict.clear(); + fileVector.clear(); + sampleIdSet.clear(); +} + +/*---------------------------------------------------------------------------- + * getSampleIds + *----------------------------------------------------------------------------*/ +const std::set& RasterFileDictionary::getSampleIds(void) const +{ + return sampleIdSet; +} + +/*---------------------------------------------------------------------------- + * copy + *----------------------------------------------------------------------------*/ +RasterFileDictionary RasterFileDictionary::copy(void) +{ + /* Create a new instance of RasterFileDictionary */ + RasterFileDictionary copyDict(keySpace >> 32); + + /* Copy fileDict */ + Dictionary::Iterator iter = Dictionary::Iterator(fileDict); + for (int i = 0; i < iter.length; ++i) + { + /* Add each key-value pair from the original to the copy */ + copyDict.fileDict.add(iter[i].key, iter[i].value); + } + + /* Copy the fileVector */ + copyDict.fileVector = fileVector; + + /* Copy the sampleIdSet */ + copyDict.sampleIdSet = sampleIdSet; + + return copyDict; +} diff --git a/packages/geo/RasterFileDictionary.h b/packages/geo/RasterFileDictionary.h new file mode 100644 index 000000000..bc6c51c55 --- /dev/null +++ b/packages/geo/RasterFileDictionary.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2021, University of Washington + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the University of Washington nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS + * “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __raster_file_dictionary__ +#define __raster_file_dictionary__ + +/****************************************************************************** + * INCLUDES + ******************************************************************************/ + +#include "OsApi.h" +#include "Dictionary.h" +#include + +/****************************************************************************** + * RASTER FIlE DICTIONARY CLASS + ******************************************************************************/ + +class RasterFileDictionary +{ + public: + + /*-------------------------------------------------------------------- + * Constants + *--------------------------------------------------------------------*/ + + /*-------------------------------------------------------------------- + * Typedefs + *--------------------------------------------------------------------*/ + + + explicit RasterFileDictionary(uint64_t _keySpace=0): keySpace(_keySpace<<32) {} + ~RasterFileDictionary(void) = default; + + uint64_t add (const std::string& fileName, bool sample=false); + const char* get (uint64_t fileId); + void setSample(uint64_t sampleFileId); + void clear (void); + + const std::set& getSampleIds(void) const; + RasterFileDictionary copy(void); + + private: + + /*-------------------------------------------------------------------- + * Methods + *--------------------------------------------------------------------*/ + + + /*-------------------------------------------------------------------- + * Data + *--------------------------------------------------------------------*/ + + Dictionary fileDict; // Dictionary to store raster file names + std::vector fileVector; // Vector to store raster file names by id (index derived from lower 32 bits of fileDict value) + std::set sampleIdSet; // Set to store raster fileIds used only in returned RasterSamples + uint64_t keySpace; // Key space + static Mutex mutex; // Mutex for thread safety, only add() method is thread safe +}; + +#endif /* __raster_file_dictionary__ */ diff --git a/packages/geo/RasterObject.cpp b/packages/geo/RasterObject.cpp index b790a33fa..be066279d 100644 --- a/packages/geo/RasterObject.cpp +++ b/packages/geo/RasterObject.cpp @@ -58,7 +58,6 @@ const struct luaL_Reg RasterObject::LUA_META_TABLE[] = { Mutex RasterObject::factoryMut; Dictionary RasterObject::factories; -Mutex RasterObject::fileDictMut; /****************************************************************************** * PUBLIC METHODS @@ -120,55 +119,6 @@ int RasterObject::luaCreate( lua_State* L ) } } -/*---------------------------------------------------------------------------- - * cppCreate - *----------------------------------------------------------------------------*/ -RasterObject* RasterObject::cppCreate(RequestFields* rqst_parms, const char* key) -{ - /* Check Parameters */ - if(!rqst_parms) return NULL; - const GeoFields* geo_fields = &rqst_parms->samplers[key]; - - /* Get Factory */ - factory_t factory; - bool found = false; - - factoryMut.lock(); - { - found = factories.find(geo_fields->asset.getName(), &factory); - } - factoryMut.unlock(); - - /* Check Factory */ - if(!found) - { - mlog(CRITICAL, "Failed to find registered raster for %s", geo_fields->asset.getName()); - return NULL; - } - - /* Create Raster */ - RasterObject* _raster = factory.create(NULL, rqst_parms, key); - if(!_raster) - { - mlog(CRITICAL, "Failed to create raster for %s", geo_fields->asset.getName()); - return NULL; - } - - /* Bump Lua Reference (for releasing in destructor) */ - referenceLuaObject(rqst_parms); - - /* Return Raster */ - return _raster; -} - -/*---------------------------------------------------------------------------- - * cppCreate - *----------------------------------------------------------------------------*/ -RasterObject* RasterObject::cppCreate(const RasterObject* obj) -{ - return cppCreate(obj->rqstParms, obj->samplerKey); -} - /*---------------------------------------------------------------------------- * registerRaster *----------------------------------------------------------------------------*/ @@ -198,7 +148,7 @@ uint32_t RasterObject::getSamples(const std::vector& points, List< try { /* Get maximum number of batch processing threads allowed */ - const uint32_t maxNumThreads = getMaxBatchThreads(); + const uint32_t maxNumThreads = std::min(std::thread::hardware_concurrency(), static_cast(16)); /* Get readers ranges */ std::vector ranges; @@ -268,13 +218,10 @@ uint32_t RasterObject::getSamples(const std::vector& points, List< RasterSample* sample = slist->get(i); /* Find the file name for the sample id in reader's dictionary */ - const char* name = reader->robj->fileDictGetFile(sample->fileId); + const char* name = reader->robj->fileDict.get(sample->fileId); /* Use user's RasterObject dictionary to store the file names. */ - const uint64_t id = fileDictAdd(name); - - /* Update the sample file id */ - sample->fileId = id; + sample->fileId = fileDict.add(name, true); } sllist.add(slist); @@ -315,18 +262,6 @@ uint8_t* RasterObject::getPixels(uint32_t ulx, uint32_t uly, uint32_t xsize, uin return NULL; } -/*---------------------------------------------------------------------------- - * getMaxBatchThreads - *----------------------------------------------------------------------------*/ -uint32_t RasterObject::getMaxBatchThreads(void) -{ - /* Maximum number of batch threads. - * Each batch thread may create multiple raster reading threads. - */ - const uint32_t maxThreads = 16; - return std::min(std::thread::hardware_concurrency(), maxThreads); -} - /*---------------------------------------------------------------------------- * Destructor *----------------------------------------------------------------------------*/ @@ -339,6 +274,9 @@ RasterObject::~RasterObject(void) delete [] samplerKey; } +/*---------------------------------------------------------------------------- + * stopSampling + *----------------------------------------------------------------------------*/ void RasterObject::stopSampling(void) { samplingEnabled = false; @@ -350,99 +288,6 @@ void RasterObject::stopSampling(void) readersMut.unlock(); } -/*---------------------------------------------------------------------------- - * fileDictAdd - *----------------------------------------------------------------------------*/ -uint64_t RasterObject::fileDictAdd(const string& fileName) -{ - uint64_t id; - - fileDictMut.lock(); - { - if(!fileDict.find(fileName.c_str(), &id)) - { - id = (rqstParms->keySpace.value << 32) | fileDict.length(); - fileDict.add(fileName.c_str(), id); - } - } - fileDictMut.unlock(); - return id; -} - -/*---------------------------------------------------------------------------- - * fileDictGetFile - *----------------------------------------------------------------------------*/ -const char* RasterObject::fileDictGetFile (uint64_t fileId) -{ - const char* fileName = NULL; - fileDictMut.lock(); - { - Dictionary::Iterator iterator(fileDict); - for(int i = 0; i < iterator.length; i++) - { - if(fileId == iterator[i].value) - { - fileName = iterator[i].key; - break; - } - } - } - fileDictMut.unlock(); - return fileName; -} - -/*---------------------------------------------------------------------------- - * fileDictClear - *----------------------------------------------------------------------------*/ -void RasterObject::fileDictClear (void) -{ - fileDictMut.lock(); - { - fileDict.clear(); - } - fileDictMut.unlock(); -} - -/*---------------------------------------------------------------------------- - * getThreadsRanges - *----------------------------------------------------------------------------*/ -void RasterObject::getThreadsRanges(std::vector& ranges, uint32_t num, - uint32_t minPerThread, uint32_t maxNumThreads) -{ - ranges.clear(); - - /* Determine how many threads to use */ - if(num <= minPerThread) - { - ranges.emplace_back(range_t{0, num}); - return; - } - - uint32_t numThreads = std::min(maxNumThreads, num / minPerThread); - - /* Ensure at least two threads if num > minPerThread */ - if(numThreads == 1 && maxNumThreads > 1) - { - numThreads = 2; - } - - const uint32_t pointsPerThread = num / numThreads; - uint32_t remainingPoints = num % numThreads; - - uint32_t start = 0; - for(uint32_t i = 0; i < numThreads; i++) - { - const uint32_t end = start + pointsPerThread + (remainingPoints > 0 ? 1 : 0); - ranges.emplace_back(range_t{start, end}); - - start = end; - if(remainingPoints > 0) - { - remainingPoints--; - } - } -} - /****************************************************************************** * PROTECTED METHODS ******************************************************************************/ @@ -455,6 +300,7 @@ RasterObject::RasterObject(lua_State *L, RequestFields* rqst_parms, const char* rqstParms(rqst_parms), parms(&rqstParms->samplers[key]), samplerKey(StringLib::duplicate(key)), + fileDict(rqstParms->keySpace.value), samplingEnabled(true) { /* Add Lua Functions */ @@ -518,18 +364,7 @@ int RasterObject::luaSamples(lua_State *L) for(int i = 0; i < slist.length(); i++) { const RasterSample* sample = slist[i]; - const char* fileName = ""; - - /* Find fileName from fileId */ - Dictionary::Iterator iterator(lua_obj->fileDictGet()); - for(int j = 0; j < iterator.length; j++) - { - if(iterator[j].value == sample->fileId) - { - fileName = iterator[j].key; - break; - } - } + const char* fileName = lua_obj->fileDict.get(sample->fileId); lua_createtable(L, 0, 4); LuaEngine::setAttrStr(L, "file", fileName); @@ -613,7 +448,107 @@ int RasterObject::luaSubsets(lua_State *L) return num_ret; } +/*---------------------------------------------------------------------------- + * cppCreate + *----------------------------------------------------------------------------*/ +RasterObject* RasterObject::cppCreate(RequestFields* rqst_parms, const char* key) +{ + /* Check Parameters */ + if(!rqst_parms) return NULL; + const GeoFields* geo_fields = &rqst_parms->samplers[key]; + + /* Get Factory */ + factory_t factory; + bool found = false; + + factoryMut.lock(); + { + found = factories.find(geo_fields->asset.getName(), &factory); + } + factoryMut.unlock(); + + /* Check Factory */ + if(!found) + { + mlog(CRITICAL, "Failed to find registered raster for %s", geo_fields->asset.getName()); + return NULL; + } + + /* Create Raster */ + RasterObject* _raster = factory.create(NULL, rqst_parms, key); + if(!_raster) + { + mlog(CRITICAL, "Failed to create raster for %s", geo_fields->asset.getName()); + return NULL; + } + + /* Bump Lua Reference (for releasing in destructor) */ + referenceLuaObject(rqst_parms); + + /* Return Raster */ + return _raster; +} + +/*---------------------------------------------------------------------------- + * cppCreate + *----------------------------------------------------------------------------*/ +RasterObject* RasterObject::cppCreate(const RasterObject* obj) +{ + return cppCreate(obj->rqstParms, obj->samplerKey); +} + +/*---------------------------------------------------------------------------- + * getThreadsRanges + *----------------------------------------------------------------------------*/ +void RasterObject::getThreadsRanges(std::vector& ranges, uint32_t num, + uint32_t minPerThread, uint32_t maxNumThreads) +{ + ranges.clear(); + + /* Determine how many threads to use */ + if(num <= minPerThread) + { + ranges.emplace_back(range_t{0, num}); + return; + } + + uint32_t numThreads = std::min(maxNumThreads, num / minPerThread); + + /* Ensure at least two threads if num > minPerThread */ + if(numThreads == 1 && maxNumThreads > 1) + { + numThreads = 2; + } + + const uint32_t pointsPerThread = num / numThreads; + uint32_t remainingPoints = num % numThreads; + + uint32_t start = 0; + for(uint32_t i = 0; i < numThreads; i++) + { + const uint32_t end = start + pointsPerThread + (remainingPoints > 0 ? 1 : 0); + ranges.emplace_back(range_t{start, end}); + + start = end; + if(remainingPoints > 0) + { + remainingPoints--; + } + } +} + +/*---------------------------------------------------------------------------- + * fileDictSetSamples + *----------------------------------------------------------------------------*/ +void RasterObject::fileDictSetSamples(List* slist) +{ + for(int i = 0; i < slist->length(); i++) + { + const RasterSample *sample = slist->get(i); + fileDict.setSample(sample->fileId); + } +} /****************************************************************************** * PRIVATE METHODS diff --git a/packages/geo/RasterObject.h b/packages/geo/RasterObject.h index 4b5580830..7e8e03c58 100644 --- a/packages/geo/RasterObject.h +++ b/packages/geo/RasterObject.h @@ -43,6 +43,7 @@ #include "GeoFields.h" #include "RasterSample.h" #include "RasterSubset.h" +#include "RasterFileDictionary.h" /****************************************************************************** * RASTER OBJECT CLASS @@ -102,14 +103,11 @@ class RasterObject: public LuaObject static void init (void); static void deinit (void); static int luaCreate (lua_State* L); - static RasterObject* cppCreate (RequestFields* rqst_parms, const char* key); - static RasterObject* cppCreate (const RasterObject* obj); static bool registerRaster (const char* _name, factory_f create); virtual uint32_t getSamples (const MathLib::point_3d_t& point, int64_t gps, List& slist, void* param=NULL) = 0; virtual uint32_t getSamples (const std::vector& points, List& sllist, void* param=NULL); virtual uint32_t getSubsets (const MathLib::extent_t& extent, int64_t gps, List& slist, void* param=NULL) = 0; virtual uint8_t* getPixels (uint32_t ulx, uint32_t uly, uint32_t xsize=0, uint32_t ysize=0, void* param=NULL); - virtual uint32_t getMaxBatchThreads(void); ~RasterObject (void) override; bool hasZonalStats (void) const @@ -127,40 +125,59 @@ class RasterObject: public LuaObject return parms->use_poi_time; } - const Dictionary& fileDictGet(void) + void stopSampling(void); + + const char* fileDictGet(uint64_t fileId) { - return fileDict; + return fileDict.get(fileId); } - void lockSampling(void) + const std::set& fileDictGetSampleIds(void) { - samplingMut.lock(); + return fileDict.getSampleIds(); } - void unlockSampling(void) + RasterFileDictionary fileDictCopy(void) { - samplingMut.unlock(); + return fileDict.copy(); } - void stopSampling (void); - bool sampling (void) {return samplingEnabled;}; - uint64_t fileDictAdd (const std::string& fileName); - const char* fileDictGetFile (uint64_t fileId); - void fileDictClear (void); - static void getThreadsRanges(std::vector& ranges, uint32_t num, - uint32_t minPerThread, uint32_t maxNumThreads); - protected: /*-------------------------------------------------------------------- * Methods *--------------------------------------------------------------------*/ + void lockSampling(void) + { + samplingMut.lock(); + } + + void unlockSampling(void) + { + samplingMut.unlock(); + } + + bool sampling(void) + { + return samplingEnabled; + }; + + RasterObject (lua_State* L, RequestFields* rqst_parms, const char* key); static int luaSamples (lua_State* L); static int luaSubsets (lua_State* L); static int luaPixels (lua_State *L); + static RasterObject* cppCreate(RequestFields* rqst_parms, const char* key); + static RasterObject* cppCreate(const RasterObject* obj); + + static void getThreadsRanges(std::vector& ranges, uint32_t num, + uint32_t minPerThread, uint32_t maxNumThreads); + + void fileDictSetSamples(List* slist); + + /*-------------------------------------------------------------------- * Data *--------------------------------------------------------------------*/ @@ -168,6 +185,7 @@ class RasterObject: public LuaObject RequestFields* rqstParms; const GeoFields* parms; const char* samplerKey; + RasterFileDictionary fileDict; private: @@ -181,15 +199,12 @@ class RasterObject: public LuaObject static uint32_t readSamples (RasterObject* robj, const range_t& range, const std::vector& points, std::vector& samples); - /*-------------------------------------------------------------------- * Data *--------------------------------------------------------------------*/ static Mutex factoryMut; static Dictionary factories; - static Mutex fileDictMut; - Dictionary fileDict; Mutex readersMut; Mutex samplingMut; diff --git a/packages/geo/RasterSampler.cpp b/packages/geo/RasterSampler.cpp index 93b74add1..effca4f7a 100644 --- a/packages/geo/RasterSampler.cpp +++ b/packages/geo/RasterSampler.cpp @@ -355,17 +355,19 @@ bool RasterSampler::processTimeout (void) *----------------------------------------------------------------------------*/ bool RasterSampler::processTermination (void) { - Dictionary::Iterator iterator(raster->fileDictGet()); - for(int i = 0; i < iterator.length; i++) + const std::set& sampleIds = raster->fileDictGetSampleIds(); + for(std::set::const_iterator it = sampleIds.begin(); it != sampleIds.end(); it++) { /* Send File Directory Entry Record for each File in Raster Dictionary */ - const int file_name_len = StringLib::size(iterator[i].key) + 1; + const uint64_t fileId = *it; + const int file_name_len = StringLib::size(raster->fileDictGet(fileId)) + 1; const int size = offsetof(file_directory_entry_t, file_name) + file_name_len; RecordObject record(fileIdRecType, size); file_directory_entry_t* entry = reinterpret_cast(record.getRecordData()); - entry->file_id = iterator[i].value; - StringLib::copy(entry->file_name, iterator[i].key, file_name_len); + entry->file_id = fileId; + StringLib::copy(entry->file_name, raster->fileDictGet(fileId), file_name_len); record.post(outQ); } + return true; } diff --git a/packages/geo/UT_RasterSample.cpp b/packages/geo/UT_RasterSample.cpp index 6ce864728..2b393e055 100644 --- a/packages/geo/UT_RasterSample.cpp +++ b/packages/geo/UT_RasterSample.cpp @@ -124,6 +124,70 @@ bool UT_RasterSample::ReadPointsFile(std::vector& po return true; } +/*---------------------------------------------------------------------------- + * TestFileDictionary + *----------------------------------------------------------------------------*/ +bool UT_RasterSample::TestFileDictionary(RasterObject* raster) +{ + /* Make a copy so we don't change the original dictionary */ + RasterFileDictionary dict = raster->fileDictCopy(); + + /* Clear the dictionary but keep keySpace */ + dict.clear(); + + const char* raster1 = "RasterOneNotSample"; + const char* raster2 = "RasterTwoSample"; + const uint64_t fileIdRaster1 = dict.add(raster1); + const uint64_t fileIdRaster2 = dict.add(raster2, true); + + const std::set& sampleIds = dict.getSampleIds(); + uint32_t cnt = 0; + const char* raserName = NULL; + uint64_t fileId; + for (std::set::const_iterator it = sampleIds.begin(); it != sampleIds.end(); it++) + { + fileId = *it; + raserName = dict.get(fileId); + cnt++; + } + + if(cnt != 1) + { + mlog(CRITICAL, "Expected 1 sample but got %d", cnt); + return false; + } + + if(strcmp(raserName, raster2) != 0) + { + mlog(CRITICAL, "Expected %s but got %s", raster2, raserName); + return false; + } + + const uint64_t keySpace1 = fileIdRaster1 >> 32; + const uint64_t keySpace2 = fileIdRaster2 >> 32; + + if(keySpace1 != keySpace2) + { + mlog(CRITICAL, "Expected key space %lu but got %lu", keySpace1, keySpace2); + return false; + } + + dict.setSample(fileIdRaster1); // Set raster1 as sample + cnt = 0; + for (std::set::const_iterator it = sampleIds.begin(); it != sampleIds.end(); it++) + { + cnt++; + } + + if(cnt != 2) + { + mlog(CRITICAL, "Expected 2 sample but got %d", cnt); + return false; + } + + return true; +} + /*---------------------------------------------------------------------------- * luaSampleTest *----------------------------------------------------------------------------*/ @@ -152,6 +216,12 @@ int UT_RasterSample::luaSampleTest(lua_State* L) /* Get Points File if provided */ const char* pointsFile = getLuaString(L, 7, true, NULL); + /* Test fileDictionary */ + if(!lua_obj->TestFileDictionary(lua_obj->raster)) + { + return returnLuaStatus(L, false); + } + std::vector points2sample; if(pointsFile) @@ -211,6 +281,9 @@ int UT_RasterSample::luaSampleTest(lua_State* L) } const double serialStopTime = TimeLib::latchtime(); + /* Store fileDictionary from serial sampling, batch sampling will overwrite it */ + RasterFileDictionary serialDict = lua_obj->raster->fileDictCopy(); + /* Get samples using batch method */ print2term("Getting samples for %zu points using batch method\n", points2sample.size()); List batch_sllist; @@ -283,8 +356,8 @@ int UT_RasterSample::luaSampleTest(lua_State* L) RasterSample* serial = (*serial_slist)[j]; RasterSample* batch = (*batch_slist)[j]; - const char* serialName = lua_obj->raster->fileDictGetFile(serial->fileId); - const char* batchName = lua_obj->raster->fileDictGetFile(batch->fileId); + const char* serialName = serialDict.get(serial->fileId); + const char* batchName = lua_obj->raster->fileDictGet(batch->fileId); if (strcmp(serialName, batchName) != 0) { diff --git a/packages/geo/UT_RasterSample.h b/packages/geo/UT_RasterSample.h index 1ace8a3c1..2d85646e1 100644 --- a/packages/geo/UT_RasterSample.h +++ b/packages/geo/UT_RasterSample.h @@ -74,6 +74,7 @@ class UT_RasterSample: public LuaObject ~UT_RasterSample (void) override; static bool ReadPointsFile(std::vector& points, const char* filePath); + static bool TestFileDictionary(RasterObject* raster); static int luaSampleTest (lua_State* L); /*-------------------------------------------------------------------- diff --git a/packages/geo/UT_RasterSubset.cpp b/packages/geo/UT_RasterSubset.cpp index f6ae263a6..1daa51377 100644 --- a/packages/geo/UT_RasterSubset.cpp +++ b/packages/geo/UT_RasterSubset.cpp @@ -244,6 +244,6 @@ int UT_RasterSubset::luaSubsetTest(lua_State* L) *----------------------------------------------------------------------------*/ const char* UT_RasterSubset::getRasterName(RasterObject* robj, uint64_t fileId) { - const char* fileName = robj->fileDictGetFile(fileId); + const char* fileName = robj->fileDictGet(fileId); return StringLib::duplicate(fileName); } \ No newline at end of file diff --git a/scripts/systests/parquet_sampler_arcticdem_perf_test.lua b/scripts/systests/parquet_sampler_arcticdem_perf_test.lua index 627cdc4f1..1e12ad669 100644 --- a/scripts/systests/parquet_sampler_arcticdem_perf_test.lua +++ b/scripts/systests/parquet_sampler_arcticdem_perf_test.lua @@ -11,9 +11,9 @@ local td = runner.rootdir(arg[0]) local outq_name = "outq-luatest" -local in_parquet = '/data/arcticdem/alaska62k.parquet' +-- local in_parquet = '/data/arcticdem/alaska62k.parquet' -- local in_parquet = '/data/arcticdem/alaska962k.parquet' --- local in_parquet = '/data/arcticdem/alaska_2milionpoints.parquet' +local in_parquet = '/data/arcticdem/alaska_2milionpoints.parquet' -- Indicates local file system (no s3 or client) local prefix = "file://" diff --git a/scripts/systests/parquet_sampler_landsat_perf_test.lua b/scripts/systests/parquet_sampler_landsat_perf_test.lua index db7fa31e5..aefbdc82c 100644 --- a/scripts/systests/parquet_sampler_landsat_perf_test.lua +++ b/scripts/systests/parquet_sampler_landsat_perf_test.lua @@ -8,8 +8,8 @@ local _,td = runner.srcscript() -- Setup -- local assets = asset.loaddir() -console.monitor:config(core.LOG, core.DEBUG) -sys.setlvl(core.LOG, core.DEBUG) +-- console.monitor:config(core.LOG, core.DEBUG) +-- sys.setlvl(core.LOG, core.DEBUG) local script_parms = {earthdata="https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials", identity="lpdaac-cloud"} local earthdata_auth_script = core.script("earth_data_auth", json.encode(script_parms)) @@ -22,9 +22,9 @@ local outq_name = "outq-luatest" -- Use the same input parquet files as 3dep -local in_parquet = '/data/3dep/wrzesien_snow_64k.parquet' +-- local in_parquet = '/data/3dep/wrzesien_snow_64k.parquet' -- local in_parquet = '/data/3dep/wrzesien_snow_525k.parquet' --- local in_parquet = '/data/3dep/wrzesien_snow_2618k.parquet' +local in_parquet = '/data/3dep/wrzesien_snow_2618k.parquet' -- Indicates local file system (no s3 or client)