-
-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Add gdal vector sort #13351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Add gdal vector sort #13351
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,356 @@ | ||||||
| /****************************************************************************** | ||||||
| * | ||||||
| * Project: GDAL | ||||||
| * Purpose: "gdal vector sort" subcommand | ||||||
| * Author: Daniel Baston | ||||||
| * | ||||||
| ****************************************************************************** | ||||||
| * Copyright (c) 2025, ISciences LLC | ||||||
| * | ||||||
| * SPDX-License-Identifier: MIT | ||||||
| ****************************************************************************/ | ||||||
|
|
||||||
| #include "gdalalg_vector_sort.h" | ||||||
|
|
||||||
| #include "cpl_error.h" | ||||||
| #include "gdal_priv.h" | ||||||
| #include "gdalalg_vector_geom.h" | ||||||
| #include "ogr_geometry.h" | ||||||
|
|
||||||
| #include "ogr_geos.h" | ||||||
|
|
||||||
| #include <cinttypes> | ||||||
|
|
||||||
| #ifndef _ | ||||||
| #define _(x) (x) | ||||||
| #endif | ||||||
|
|
||||||
| //! @cond Doxygen_Suppress | ||||||
|
|
||||||
| GDALVectorSortAlgorithm::GDALVectorSortAlgorithm(bool standaloneStep) | ||||||
| : GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL, | ||||||
| standaloneStep) | ||||||
| { | ||||||
| AddArg("method", 0, _("Geometry sorting algorithm"), &m_sortMethod) | ||||||
| .SetChoices("hilbert", "strtree") | ||||||
| .SetDefault(m_sortMethod); | ||||||
| } | ||||||
|
|
||||||
| constexpr uint32_t CPL_HILBERT_MAX = (1 << 16) - 1; | ||||||
|
|
||||||
| std::uint32_t CPLHilbertCode(std::uint32_t x, std::uint32_t y) | ||||||
| { | ||||||
| // Based on public domain code at | ||||||
| // https://github.com/rawrunprotected/hilbert_curves | ||||||
|
|
||||||
| uint32_t a = x ^ y; | ||||||
| uint32_t b = 0xFFFF ^ a; | ||||||
| uint32_t c = 0xFFFF ^ (x | y); | ||||||
| uint32_t d = x & (y ^ 0xFFFF); | ||||||
|
|
||||||
| uint32_t A = a | (b >> 1); | ||||||
| uint32_t B = (a >> 1) ^ a; | ||||||
| uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c; | ||||||
| uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d; | ||||||
|
|
||||||
| a = A; | ||||||
| b = B; | ||||||
| c = C; | ||||||
| d = D; | ||||||
| A = ((a & (a >> 2)) ^ (b & (b >> 2))); | ||||||
| B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2))); | ||||||
| C ^= ((a & (c >> 2)) ^ (b & (d >> 2))); | ||||||
| D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2))); | ||||||
|
|
||||||
| a = A; | ||||||
| b = B; | ||||||
| c = C; | ||||||
| d = D; | ||||||
| A = ((a & (a >> 4)) ^ (b & (b >> 4))); | ||||||
| B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4))); | ||||||
| C ^= ((a & (c >> 4)) ^ (b & (d >> 4))); | ||||||
| D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4))); | ||||||
|
|
||||||
| a = A; | ||||||
| b = B; | ||||||
| c = C; | ||||||
| d = D; | ||||||
| C ^= ((a & (c >> 8)) ^ (b & (d >> 8))); | ||||||
| D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8))); | ||||||
|
|
||||||
| a = C ^ (C >> 1); | ||||||
| b = D ^ (D >> 1); | ||||||
|
|
||||||
| uint32_t i0 = x ^ y; | ||||||
| uint32_t i1 = b | (0xFFFF ^ (i0 | a)); | ||||||
|
|
||||||
| i0 = (i0 | (i0 << 8)) & 0x00FF00FF; | ||||||
| i0 = (i0 | (i0 << 4)) & 0x0F0F0F0F; | ||||||
| i0 = (i0 | (i0 << 2)) & 0x33333333; | ||||||
| i0 = (i0 | (i0 << 1)) & 0x55555555; | ||||||
|
|
||||||
| i1 = (i1 | (i1 << 8)) & 0x00FF00FF; | ||||||
| i1 = (i1 | (i1 << 4)) & 0x0F0F0F0F; | ||||||
| i1 = (i1 | (i1 << 2)) & 0x33333333; | ||||||
| i1 = (i1 | (i1 << 1)) & 0x55555555; | ||||||
|
|
||||||
| uint32_t value = ((i1 << 1) | i0); | ||||||
|
|
||||||
| return value; | ||||||
| } | ||||||
|
|
||||||
| std::uint32_t CPLHilbertCode(const OGREnvelope &oDomain, double dfX, double dfY) | ||||||
| { | ||||||
| uint32_t x = 0; | ||||||
| uint32_t y = 0; | ||||||
| if (oDomain.Width() != 0.0) | ||||||
| x = static_cast<uint32_t>( | ||||||
| floor(CPL_HILBERT_MAX * (dfX - oDomain.MinX) / oDomain.Width())); | ||||||
| if (oDomain.Height() != 0.0) | ||||||
| y = static_cast<uint32_t>( | ||||||
| floor(CPL_HILBERT_MAX * (dfY - oDomain.MinY) / oDomain.Height())); | ||||||
| return CPLHilbertCode(x, y); | ||||||
| } | ||||||
|
|
||||||
| namespace | ||||||
| { | ||||||
|
|
||||||
| bool CreateDstFeatures( | ||||||
| const std::vector<std::unique_ptr<OGRFeature>> &srcFeatures, | ||||||
| const std::vector<size_t> &sortedIndices, OGRLayer &dstLayer) | ||||||
| { | ||||||
| for (size_t iSrcFeature : sortedIndices) | ||||||
| { | ||||||
| OGRFeature *poSrcFeature = srcFeatures[iSrcFeature].get(); | ||||||
|
|
||||||
| // Tried just using poSrcFeature->SetFDefnUnsafe and plugging | ||||||
| // directly into CreateFeature, but this resulted in the order | ||||||
| // of the input features being preserved. | ||||||
|
|
||||||
| OGRFeature f(dstLayer.GetLayerDefn()); | ||||||
| f.SetFrom(poSrcFeature); | ||||||
| if (dstLayer.CreateFeature(&f) != OGRERR_NONE) | ||||||
| { | ||||||
| return false; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| class GDALVectorHilbertSortDataset | ||||||
| : public GDALVectorNonStreamingAlgorithmDataset | ||||||
| { | ||||||
| public: | ||||||
| using GDALVectorNonStreamingAlgorithmDataset:: | ||||||
| GDALVectorNonStreamingAlgorithmDataset; | ||||||
|
|
||||||
| bool Process(OGRLayer &srcLayer, OGRLayer &dstLayer) override | ||||||
| { | ||||||
| std::vector<std::unique_ptr<OGRFeature>> features; | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be great if we had a way of dealing with layers that don't fit in RAM (but where the hilbertCodes vector would still fit). That would likely involve using a temporary storage like I did in the Parquet driver where when with the SORT_BY_BBOX layer creation option is used, a temporary GeoPackage file is created., for source layers that don't have efficient GetFeature() implementation. |
||||||
|
|
||||||
| for (auto &feature : srcLayer) | ||||||
| { | ||||||
| features.emplace_back(feature.release()); | ||||||
| } | ||||||
| OGREnvelope oLayerExtent; | ||||||
|
|
||||||
| std::vector<OGREnvelope> envelopes(features.size()); | ||||||
| for (size_t i = 0; i < features.size(); i++) | ||||||
| { | ||||||
| // TODO GeomFieldRef | ||||||
| const OGRFeature *poFeature = features[i].get(); | ||||||
| const OGRGeometry *poGeom = poFeature->GetGeometryRef(); | ||||||
|
|
||||||
| if (poGeom != nullptr) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| { | ||||||
| poGeom->getEnvelope(&envelopes[i]); | ||||||
| oLayerExtent.Merge(envelopes[i]); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| std::vector<std::pair<std::size_t, std::uint32_t>> hilbertCodes( | ||||||
| features.size()); | ||||||
| for (std::size_t i = 0; i < features.size(); i++) | ||||||
| { | ||||||
| hilbertCodes[i].first = i; | ||||||
|
|
||||||
| if (envelopes[i].IsInit()) | ||||||
| { | ||||||
| double dfX, dfY; | ||||||
| envelopes[i].Center(dfX, dfY); | ||||||
| hilbertCodes[i].second = CPLHilbertCode(oLayerExtent, dfX, dfY); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| std::sort(hilbertCodes.begin(), hilbertCodes.end(), | ||||||
| [](const auto &a, const auto &b) | ||||||
| { return a.second < b.second; }); | ||||||
|
|
||||||
| std::vector<size_t> sortedIndices; | ||||||
| for (const auto &sItem : hilbertCodes) | ||||||
| { | ||||||
| sortedIndices.push_back(sItem.first); | ||||||
| } | ||||||
|
|
||||||
| return CreateDstFeatures(features, sortedIndices, dstLayer); | ||||||
| } | ||||||
|
|
||||||
| private: | ||||||
| CPL_DISALLOW_COPY_ASSIGN(GDALVectorHilbertSortDataset) | ||||||
| }; | ||||||
|
|
||||||
| #ifdef HAVE_GEOS | ||||||
| class GDALVectorSTRTreeSortDataset | ||||||
| : public GDALVectorNonStreamingAlgorithmDataset | ||||||
| { | ||||||
| public: | ||||||
| using GDALVectorNonStreamingAlgorithmDataset:: | ||||||
| GDALVectorNonStreamingAlgorithmDataset; | ||||||
|
|
||||||
| ~GDALVectorSTRTreeSortDataset() override | ||||||
| { | ||||||
| if (m_geosContext) | ||||||
| { | ||||||
| finishGEOS_r(m_geosContext); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| bool Process(OGRLayer &srcLayer, OGRLayer &dstLayer) override | ||||||
| { | ||||||
| std::vector<std::unique_ptr<OGRFeature>> features; | ||||||
| std::vector<size_t> sortedIndices; | ||||||
|
|
||||||
| for (auto &feature : srcLayer) | ||||||
| { | ||||||
| features.emplace_back(feature.release()); | ||||||
| } | ||||||
| // TODO: variant of this fn returning unique_ptr | ||||||
| m_geosContext = OGRGeometry::createGEOSContext(); | ||||||
|
|
||||||
| auto TreeDeleter = [this](GEOSSTRtree *tree) | ||||||
| { GEOSSTRtree_destroy_r(m_geosContext, tree); }; | ||||||
|
|
||||||
| std::unique_ptr<GEOSSTRtree, decltype(TreeDeleter)> poTree( | ||||||
| GEOSSTRtree_create_r(m_geosContext, 10), TreeDeleter); | ||||||
|
|
||||||
| OGREnvelope oGeomExtent; | ||||||
| std::vector<size_t> nullIndices; | ||||||
| for (size_t i = 0; i < features.size(); i++) | ||||||
| { | ||||||
| const OGRFeature *poFeature = features[i].get(); | ||||||
| const OGRGeometry *poGeom = poFeature->GetGeometryRef(); | ||||||
| if (poGeom == nullptr) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| { | ||||||
| nullIndices.push_back(i); | ||||||
| continue; | ||||||
| } | ||||||
|
|
||||||
| poGeom->getEnvelope(&oGeomExtent); | ||||||
| GEOSGeometry *poEnv = CreateGEOSEnvelope(oGeomExtent); | ||||||
| if (poEnv == nullptr) | ||||||
| { | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| GEOSSTRtree_insert_r(m_geosContext, poTree.get(), poEnv, | ||||||
| reinterpret_cast<void *>(i)); | ||||||
| GEOSGeom_destroy_r(m_geosContext, poEnv); | ||||||
| } | ||||||
|
|
||||||
| GEOSSTRtree_build_r(m_geosContext, poTree.get()); | ||||||
|
|
||||||
| GEOSSTRtree_iterate_r( | ||||||
| m_geosContext, poTree.get(), | ||||||
| [](void *item, void *userData) | ||||||
| { | ||||||
| static_cast<std::vector<size_t> *>(userData)->push_back( | ||||||
| reinterpret_cast<size_t>(item)); | ||||||
| }, | ||||||
| &sortedIndices); | ||||||
|
|
||||||
| for (size_t nullInd : nullIndices) | ||||||
| { | ||||||
| sortedIndices.push_back(nullInd); | ||||||
| } | ||||||
|
|
||||||
| return CreateDstFeatures(features, sortedIndices, dstLayer); | ||||||
| } | ||||||
|
|
||||||
| private: | ||||||
| CPL_DISALLOW_COPY_ASSIGN(GDALVectorSTRTreeSortDataset) | ||||||
|
|
||||||
| // FIXME: Duplicated from alg/zonal.cpp. | ||||||
| // Put into OGRGeometryFactory? | ||||||
| GEOSGeometry *CreateGEOSEnvelope(const OGREnvelope &oEnv) const | ||||||
| { | ||||||
| GEOSCoordSequence *seq = GEOSCoordSeq_create_r(m_geosContext, 2, 2); | ||||||
| if (seq == nullptr) | ||||||
| { | ||||||
| return nullptr; | ||||||
| } | ||||||
| GEOSCoordSeq_setXY_r(m_geosContext, seq, 0, oEnv.MinX, oEnv.MinY); | ||||||
| GEOSCoordSeq_setXY_r(m_geosContext, seq, 1, oEnv.MaxX, oEnv.MaxY); | ||||||
| return GEOSGeom_createLineString_r(m_geosContext, seq); | ||||||
| } | ||||||
|
|
||||||
| GEOSContextHandle_t m_geosContext{nullptr}; | ||||||
| }; | ||||||
| #endif | ||||||
|
|
||||||
| } // namespace | ||||||
|
|
||||||
| bool GDALVectorSortAlgorithm::RunStep(GDALPipelineStepRunContext &) | ||||||
| { | ||||||
| auto poSrcDS = m_inputDataset[0].GetDatasetRef(); | ||||||
| std::unique_ptr<GDALVectorNonStreamingAlgorithmDataset> poDstDS; | ||||||
|
|
||||||
| if (m_sortMethod == "hilbert") | ||||||
| { | ||||||
| poDstDS = std::make_unique<GDALVectorHilbertSortDataset>(); | ||||||
| } | ||||||
| else if (m_sortMethod == "strtree") | ||||||
| { | ||||||
| poDstDS = std::make_unique<GDALVectorSTRTreeSortDataset>(); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| // error | ||||||
| } | ||||||
|
|
||||||
| for (auto &&poSrcLayer : poSrcDS->GetLayers()) | ||||||
| { | ||||||
| if (m_inputLayerNames.empty() || | ||||||
| std::find(m_inputLayerNames.begin(), m_inputLayerNames.end(), | ||||||
| poSrcLayer->GetDescription()) != m_inputLayerNames.end()) | ||||||
| { | ||||||
| const auto poSrcLayerDefn = poSrcLayer->GetLayerDefn(); | ||||||
| if (poSrcLayerDefn->GetGeomFieldCount() == 0) | ||||||
| { | ||||||
| if (m_inputLayerNames.empty()) | ||||||
| continue; | ||||||
| ReportError(CE_Failure, CPLE_AppDefined, | ||||||
| "Specified layer '%s' has no geometry field", | ||||||
| poSrcLayer->GetDescription()); | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| // FIXME geom-field | ||||||
|
|
||||||
| if (!poDstDS->AddProcessedLayer(*poSrcLayer, | ||||||
| *poSrcLayer->GetLayerDefn())) | ||||||
| { | ||||||
| return false; | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| m_outputDataset.Set(std::move(poDstDS)); | ||||||
|
|
||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| GDALVectorSortAlgorithmStandalone::~GDALVectorSortAlgorithmStandalone() = | ||||||
| default; | ||||||
|
|
||||||
| //! @endcond | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
call SetFID(OGRNullFID) should likely allow you to use SetFDefnUnsafe