Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ target_sources(appslib PRIVATE
gdalalg_vector_reproject.cpp
gdalalg_vector_select.cpp
gdalalg_vector_set_field_type.cpp
gdalalg_vector_sort.cpp
gdalalg_vector_sql.cpp
gdalalg_vector_write.cpp
gdalalg_vsi.cpp
Expand Down
2 changes: 2 additions & 0 deletions apps/gdalalg_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "gdalalg_vector_set_geom_type.h"
#include "gdalalg_vector_simplify.h"
#include "gdalalg_vector_simplify_coverage.h"
#include "gdalalg_vector_sort.h"
#include "gdalalg_vector_sql.h"
#include "gdalalg_vector_make_valid.h"
#include "gdalalg_vector_swap_xy.h"
Expand Down Expand Up @@ -94,6 +95,7 @@ class GDALVectorAlgorithm final : public GDALAlgorithm
RegisterSubAlgorithm<GDALVectorSetGeomTypeAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorSimplifyAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorSimplifyCoverageAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorSortAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorSQLAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorSwapXYAlgorithmStandalone>();
}
Expand Down
2 changes: 2 additions & 0 deletions apps/gdalalg_vector_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "gdalalg_vector_set_geom_type.h"
#include "gdalalg_vector_simplify.h"
#include "gdalalg_vector_simplify_coverage.h"
#include "gdalalg_vector_sort.h"
#include "gdalalg_vector_sql.h"
#include "gdalalg_vector_swap_xy.h"
#include "gdalalg_vector_write.h"
Expand Down Expand Up @@ -179,6 +180,7 @@ void GDALVectorPipelineAlgorithm::RegisterAlgorithms(
registry.Register<GDALVectorSetGeomTypeAlgorithm>();
registry.Register<GDALVectorSimplifyAlgorithm>();
registry.Register<GDALVectorSimplifyCoverageAlgorithm>();
registry.Register<GDALVectorSortAlgorithm>();
registry.Register<GDALVectorSQLAlgorithm>();
registry.Register<GDALVectorSwapXYAlgorithm>();

Expand Down
356 changes: 356 additions & 0 deletions apps/gdalalg_vector_sort.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,356 @@
/******************************************************************************
*
* Project: GDAL
* Purpose: "gdal vector sort" subcommand
* Author: Daniel Baston
*
******************************************************************************
* Copyright (c) 2025, ISciences LLC
*
* SPDX-License-Identifier: MIT
****************************************************************************/

#include "gdalalg_vector_sort.h"

#include "cpl_error.h"
#include "gdal_priv.h"
#include "gdalalg_vector_geom.h"
#include "ogr_geometry.h"

#include "ogr_geos.h"

#include <cinttypes>

#ifndef _
#define _(x) (x)
#endif

//! @cond Doxygen_Suppress

GDALVectorSortAlgorithm::GDALVectorSortAlgorithm(bool standaloneStep)
: GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL,
standaloneStep)
{
AddArg("method", 0, _("Geometry sorting algorithm"), &m_sortMethod)
.SetChoices("hilbert", "strtree")
.SetDefault(m_sortMethod);
}

constexpr uint32_t CPL_HILBERT_MAX = (1 << 16) - 1;

std::uint32_t CPLHilbertCode(std::uint32_t x, std::uint32_t y)
{
// Based on public domain code at
// https://github.com/rawrunprotected/hilbert_curves

uint32_t a = x ^ y;
uint32_t b = 0xFFFF ^ a;
uint32_t c = 0xFFFF ^ (x | y);
uint32_t d = x & (y ^ 0xFFFF);

uint32_t A = a | (b >> 1);
uint32_t B = (a >> 1) ^ a;
uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c;
uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d;

a = A;
b = B;
c = C;
d = D;
A = ((a & (a >> 2)) ^ (b & (b >> 2)));
B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2)));
C ^= ((a & (c >> 2)) ^ (b & (d >> 2)));
D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2)));

a = A;
b = B;
c = C;
d = D;
A = ((a & (a >> 4)) ^ (b & (b >> 4)));
B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4)));
C ^= ((a & (c >> 4)) ^ (b & (d >> 4)));
D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4)));

a = A;
b = B;
c = C;
d = D;
C ^= ((a & (c >> 8)) ^ (b & (d >> 8)));
D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8)));

a = C ^ (C >> 1);
b = D ^ (D >> 1);

uint32_t i0 = x ^ y;
uint32_t i1 = b | (0xFFFF ^ (i0 | a));

i0 = (i0 | (i0 << 8)) & 0x00FF00FF;
i0 = (i0 | (i0 << 4)) & 0x0F0F0F0F;
i0 = (i0 | (i0 << 2)) & 0x33333333;
i0 = (i0 | (i0 << 1)) & 0x55555555;

i1 = (i1 | (i1 << 8)) & 0x00FF00FF;
i1 = (i1 | (i1 << 4)) & 0x0F0F0F0F;
i1 = (i1 | (i1 << 2)) & 0x33333333;
i1 = (i1 | (i1 << 1)) & 0x55555555;

uint32_t value = ((i1 << 1) | i0);

return value;
}

std::uint32_t CPLHilbertCode(const OGREnvelope &oDomain, double dfX, double dfY)
{
uint32_t x = 0;
uint32_t y = 0;
if (oDomain.Width() != 0.0)
x = static_cast<uint32_t>(
floor(CPL_HILBERT_MAX * (dfX - oDomain.MinX) / oDomain.Width()));
if (oDomain.Height() != 0.0)
y = static_cast<uint32_t>(
floor(CPL_HILBERT_MAX * (dfY - oDomain.MinY) / oDomain.Height()));
return CPLHilbertCode(x, y);
}

namespace
{

bool CreateDstFeatures(
const std::vector<std::unique_ptr<OGRFeature>> &srcFeatures,
const std::vector<size_t> &sortedIndices, OGRLayer &dstLayer)
{
for (size_t iSrcFeature : sortedIndices)
{
OGRFeature *poSrcFeature = srcFeatures[iSrcFeature].get();

// Tried just using poSrcFeature->SetFDefnUnsafe and plugging
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

call SetFID(OGRNullFID) should likely allow you to use SetFDefnUnsafe

// directly into CreateFeature, but this resulted in the order
// of the input features being preserved.

OGRFeature f(dstLayer.GetLayerDefn());
f.SetFrom(poSrcFeature);
if (dstLayer.CreateFeature(&f) != OGRERR_NONE)
{
return false;
}
}

return true;
}

class GDALVectorHilbertSortDataset
: public GDALVectorNonStreamingAlgorithmDataset
{
public:
using GDALVectorNonStreamingAlgorithmDataset::
GDALVectorNonStreamingAlgorithmDataset;

bool Process(OGRLayer &srcLayer, OGRLayer &dstLayer) override
{
std::vector<std::unique_ptr<OGRFeature>> features;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be great if we had a way of dealing with layers that don't fit in RAM (but where the hilbertCodes vector would still fit). That would likely involve using a temporary storage like I did in the Parquet driver where when with the SORT_BY_BBOX layer creation option is used, a temporary GeoPackage file is created., for source layers that don't have efficient GetFeature() implementation.
This remark is not to be implemented in the context of this PR.


for (auto &feature : srcLayer)
{
features.emplace_back(feature.release());
}
OGREnvelope oLayerExtent;

std::vector<OGREnvelope> envelopes(features.size());
for (size_t i = 0; i < features.size(); i++)
{
// TODO GeomFieldRef
const OGRFeature *poFeature = features[i].get();
const OGRGeometry *poGeom = poFeature->GetGeometryRef();

if (poGeom != nullptr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (poGeom != nullptr)
if (poGeom != nullptr && !poGeom->IsEmpty())

{
poGeom->getEnvelope(&envelopes[i]);
oLayerExtent.Merge(envelopes[i]);
}
}

std::vector<std::pair<std::size_t, std::uint32_t>> hilbertCodes(
features.size());
for (std::size_t i = 0; i < features.size(); i++)
{
hilbertCodes[i].first = i;

if (envelopes[i].IsInit())
{
double dfX, dfY;
envelopes[i].Center(dfX, dfY);
hilbertCodes[i].second = CPLHilbertCode(oLayerExtent, dfX, dfY);
}
}

std::sort(hilbertCodes.begin(), hilbertCodes.end(),
[](const auto &a, const auto &b)
{ return a.second < b.second; });

std::vector<size_t> sortedIndices;
for (const auto &sItem : hilbertCodes)
{
sortedIndices.push_back(sItem.first);
}

return CreateDstFeatures(features, sortedIndices, dstLayer);
}

private:
CPL_DISALLOW_COPY_ASSIGN(GDALVectorHilbertSortDataset)
};

#ifdef HAVE_GEOS
class GDALVectorSTRTreeSortDataset
: public GDALVectorNonStreamingAlgorithmDataset
{
public:
using GDALVectorNonStreamingAlgorithmDataset::
GDALVectorNonStreamingAlgorithmDataset;

~GDALVectorSTRTreeSortDataset() override
{
if (m_geosContext)
{
finishGEOS_r(m_geosContext);
}
}

bool Process(OGRLayer &srcLayer, OGRLayer &dstLayer) override
{
std::vector<std::unique_ptr<OGRFeature>> features;
std::vector<size_t> sortedIndices;

for (auto &feature : srcLayer)
{
features.emplace_back(feature.release());
}
// TODO: variant of this fn returning unique_ptr
m_geosContext = OGRGeometry::createGEOSContext();

auto TreeDeleter = [this](GEOSSTRtree *tree)
{ GEOSSTRtree_destroy_r(m_geosContext, tree); };

std::unique_ptr<GEOSSTRtree, decltype(TreeDeleter)> poTree(
GEOSSTRtree_create_r(m_geosContext, 10), TreeDeleter);

OGREnvelope oGeomExtent;
std::vector<size_t> nullIndices;
for (size_t i = 0; i < features.size(); i++)
{
const OGRFeature *poFeature = features[i].get();
const OGRGeometry *poGeom = poFeature->GetGeometryRef();
if (poGeom == nullptr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (poGeom == nullptr)
if (poGeom == nullptr || poGeom->IsEmpty())

{
nullIndices.push_back(i);
continue;
}

poGeom->getEnvelope(&oGeomExtent);
GEOSGeometry *poEnv = CreateGEOSEnvelope(oGeomExtent);
if (poEnv == nullptr)
{
return false;
}

GEOSSTRtree_insert_r(m_geosContext, poTree.get(), poEnv,
reinterpret_cast<void *>(i));
GEOSGeom_destroy_r(m_geosContext, poEnv);
}

GEOSSTRtree_build_r(m_geosContext, poTree.get());

GEOSSTRtree_iterate_r(
m_geosContext, poTree.get(),
[](void *item, void *userData)
{
static_cast<std::vector<size_t> *>(userData)->push_back(
reinterpret_cast<size_t>(item));
},
&sortedIndices);

for (size_t nullInd : nullIndices)
{
sortedIndices.push_back(nullInd);
}

return CreateDstFeatures(features, sortedIndices, dstLayer);
}

private:
CPL_DISALLOW_COPY_ASSIGN(GDALVectorSTRTreeSortDataset)

// FIXME: Duplicated from alg/zonal.cpp.
// Put into OGRGeometryFactory?
GEOSGeometry *CreateGEOSEnvelope(const OGREnvelope &oEnv) const
{
GEOSCoordSequence *seq = GEOSCoordSeq_create_r(m_geosContext, 2, 2);
if (seq == nullptr)
{
return nullptr;
}
GEOSCoordSeq_setXY_r(m_geosContext, seq, 0, oEnv.MinX, oEnv.MinY);
GEOSCoordSeq_setXY_r(m_geosContext, seq, 1, oEnv.MaxX, oEnv.MaxY);
return GEOSGeom_createLineString_r(m_geosContext, seq);
}

GEOSContextHandle_t m_geosContext{nullptr};
};
#endif

} // namespace

bool GDALVectorSortAlgorithm::RunStep(GDALPipelineStepRunContext &)
{
auto poSrcDS = m_inputDataset[0].GetDatasetRef();
std::unique_ptr<GDALVectorNonStreamingAlgorithmDataset> poDstDS;

if (m_sortMethod == "hilbert")
{
poDstDS = std::make_unique<GDALVectorHilbertSortDataset>();
}
else if (m_sortMethod == "strtree")
{
poDstDS = std::make_unique<GDALVectorSTRTreeSortDataset>();
}
else
{
// error
}

for (auto &&poSrcLayer : poSrcDS->GetLayers())
{
if (m_inputLayerNames.empty() ||
std::find(m_inputLayerNames.begin(), m_inputLayerNames.end(),
poSrcLayer->GetDescription()) != m_inputLayerNames.end())
{
const auto poSrcLayerDefn = poSrcLayer->GetLayerDefn();
if (poSrcLayerDefn->GetGeomFieldCount() == 0)
{
if (m_inputLayerNames.empty())
continue;
ReportError(CE_Failure, CPLE_AppDefined,
"Specified layer '%s' has no geometry field",
poSrcLayer->GetDescription());
return false;
}

// FIXME geom-field

if (!poDstDS->AddProcessedLayer(*poSrcLayer,
*poSrcLayer->GetLayerDefn()))
{
return false;
}
}
}

m_outputDataset.Set(std::move(poDstDS));

return true;
}

GDALVectorSortAlgorithmStandalone::~GDALVectorSortAlgorithmStandalone() =
default;

//! @endcond
Loading
Loading