maplibre · FloHerrnb · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/cpp/src/mlt/decode/string.hpp b/cpp/src/mlt/decode/string.hpp
@@ -64,7 +64,9 @@ class StringDecoder {
                         throw std::runtime_error("Data stream missing logical type");
                     }
                     dictType = streamMetadata->getLogicalStreamType()->getDictionaryType();
+                    /// can we only get 2 dictionarytypes in here?
                     auto& target = (dictType == DictionaryType::SINGLE) ? dictionaryStream : symbolStream;
+                    /// DictionaryTpye::FSST is not used?
                     decodeRaw(tileData, target, streamMetadata->getByteLength(), /*consume=*/true);
                     break;
                 }
@@ -209,53 +211,6 @@ class StringDecoder {
                           decompressedLength);
     }
 
-    static std::vector<std::uint8_t> decodeFSST(const std::uint8_t* symbols,
-                                                const std::size_t symbolCount,
-                                                const std::uint32_t* symbolLengths,
-                                                const std::size_t symbolLengthCount,
-                                                const std::uint8_t* compressedData,
-                                                const std::size_t compressedDataCount,
-                                                const std::size_t decompressedLength) {
-        std::vector<std::uint8_t> output;
-
-        if (decompressedLength > 0) {
-            output.resize(decompressedLength);
-        }
-        std::vector<std::uint32_t> symbolOffsets(symbolLengthCount);
-        for (size_t i = 1; i < symbolLengthCount; i++) {
-            symbolOffsets[i] = symbolOffsets[i - 1] + symbolLengths[i - 1];
-        }
-
-        std::size_t idx = 0;
-        for (size_t i = 0; i < compressedDataCount; i++) {
-            const std::uint8_t symbolIndex = compressedData[i];
-
-            // 255 is our escape byte -> take the next symbol as it is
-            if (symbolIndex == 255) {
-                if (idx == output.size()) {
-                    output.resize(output.size() * 2);
-                }
-                output[idx++] = compressedData[++i];
-            } else if (symbolIndex < symbolLengthCount) {
-                const auto len = symbolLengths[symbolIndex];
-                if (idx + len > output.size()) {
-                    output.resize((output.size() + len) * 2);
-                }
-                const auto offset = symbolOffsets[symbolIndex];
-                if (offset >= symbolCount) {
-                    throw std::runtime_error("FSST decode: symbol index out of bounds");
-                }
-                std::memcpy(&output[idx], &symbols[offset], len);
-                idx += len;
-            } else {
-                throw std::runtime_error("FSST decode: invalid symbol index");
-            }
-        }
-
-        output.resize(idx);
-        return output;
-    }
-
 private:
     IntegerDecoder& intDecoder;
 
@@ -317,6 +272,58 @@ class StringDecoder {
             out.push_back(dictionary[offsets[offsetIndex++]]);
         }
     }
+
+    static std::vector<std::uint8_t> decodeFSST(const std::uint8_t* symbols,
+                                                const std::size_t symbolCount,
+                                                const std::uint32_t* symbolLengths,
+                                                const std::size_t symbolLengthCount,
+                                                const std::uint8_t* compressedData,
+                                                const std::size_t compressedDataCount,
+                                                const std::size_t decompressedLength) {
+        std::vector<std::uint8_t> output;
+        output.reserve(decompressedLength);
+
+        std::vector<std::uint32_t> symbolOffsets;
+        for (size_t i = 1; i < symbolLengthCount; i++) {
+            symbolOffsets[i] = symbolOffsets[i - 1] + symbolLengths[i - 1];
+        }
+
+        for (size_t i = 0; i < compressedDataCount; i++) {
+            const std::uint8_t symbolIndex = compressedData[i];
+            // 255 is our escape byte -> take the next symbol as it is
+            if (symbolIndex == 255) {
+                /// this operation just copies the plain strings which are uncompressed
+                if (compressedData[i + 1] == 255) {
+                    throw std::runtime_error("FSST decode: two escape sequences in a row detected index");
+                }
+                output.push_back(compressedData[++i]);
+            } else if (symbolIndex < symbolLengthCount) {
+                const auto len = symbolLengths[symbolIndex];
+                const auto offset = symbolOffsets[symbolIndex];
+                if (offset >= symbolCount) {
+                    throw std::runtime_error("FSST decode: symbol index out of bounds");
+                }
+                const std::uint8_t* start = symbols + offset;
+                const std::uint8_t* end = start + len;
+                output.insert(output.end(), start, end);
+            } else {
+                throw std::runtime_error("FSST decode: invalid symbol index");
+            }
+        }
+        return output;
+        /*  the code below provides a faster lookup in my opinion. It is the "easy" example from the fsst paper.
+        This is currently not possible since the symbols are already tightly packed inside the byte stream for fsst
+        encoding. The trade-off was made for tighter packing for the symbol table
+
+        We can decode 8bytes of string value via
+        void decodeSingleByteviaFSST(uint8_t in[], uint8_t out[],
+                uint64_t sym[256], uint8_t len[256]){
+                uint8_t code = *in++;
+                *((uint64_t*)out) = sym[code];
+                out += len[code];
+        }
+        */
+    }
 };
 
 } // namespace mlt::decoder
diff --git a/cpp/test/test_fsst.cpp b/cpp/test/test_fsst.cpp
@@ -5,7 +5,7 @@
 #include <string>
 #include <vector>
 
-TEST(FSST, DecodeFromJava) {
+TEST(FSST, DecodeFromJava_decode1) {
     const std::string expected = "AAAAAAABBBAAACCdddddEEEEEEfffEEEEAAAAAddddCC";
     const std::vector<std::uint8_t> symbols = {65, 65, 69, 69, 100, 100, 65, 66, 67, 69, 100, 102};
     const std::vector<std::uint32_t> symbolLengths = {2, 2, 2, 1, 1, 1, 1, 1, 1};
@@ -17,14 +17,69 @@ TEST(FSST, DecodeFromJava) {
 
     EXPECT_EQ(decoded.size(), expected.size());
     EXPECT_EQ(0, memcmp(expected.c_str(), decoded.data(), expected.size()));
+}
+
+TEST(FSST, DecodeFromJava_decode2) {
+    const std::string expected = "AAAAAAABBBAAACCdddddEEEEEEfffEEEEAAAAAddddCC";
+    const std::vector<std::uint8_t> symbols = {65, 65, 69, 69, 100, 100, 65, 66, 67, 69, 100, 102};
+    const std::vector<std::uint32_t> symbolLengths = {2, 2, 2, 1, 1, 1, 1, 1, 1};
+    const std::vector<std::uint8_t> javaCompressed = {0, 0, 0, 3, 4, 4, 4, 0, 3, 5, 5, 2, 2, 7, 1,
+                                                      1, 1, 8, 8, 8, 1, 1, 0, 0, 3, 2, 2, 5, 5};
 
     // also make sure buffer growth works
     const auto decoded2 = mlt::decoder::StringDecoder::decodeFSST(symbols, symbolLengths, javaCompressed, 0);
     EXPECT_EQ(decoded2.size(), expected.size());
     EXPECT_EQ(0, memcmp(expected.c_str(), decoded2.data(), expected.size()));
+}
+
+TEST(FSST, DecodeFromJava_decode3) {
+    const std::string expected = "AAAAAAABBBAAACCdddddEEEEEEfffEEEEAAAAAddddCC";
+    const std::vector<std::uint8_t> symbols = {65, 65, 69, 69, 100, 100, 65, 66, 67, 69, 100, 102};
+    const std::vector<std::uint32_t> symbolLengths = {2, 2, 2, 1, 1, 1, 1, 1, 1};
+    const std::vector<std::uint8_t> javaCompressed = {0, 0, 0, 3, 4, 4, 4, 0, 3, 5, 5, 2, 2, 7, 1,
+                                                      1, 1, 8, 8, 8, 1, 1, 0, 0, 3, 2, 2, 5, 5};
 
     const auto decoded3 = mlt::decoder::StringDecoder::decodeFSST(
         symbols, symbolLengths, javaCompressed, expected.size() / 2);
     EXPECT_EQ(decoded3.size(), expected.size());
     EXPECT_EQ(0, memcmp(expected.c_str(), decoded3.data(), expected.size() / 2));
 }
+
+TEST(FSST, DecodeFromJava_With_one_Escape_character) {
+    const std::string expected = "AAAAAAABBBAAACCdddddEEEEEEfffEEEEAAAAAddddCCk";
+    const std::vector<std::uint8_t> symbols = {65, 65, 69, 69, 100, 100, 65, 66, 67, 69, 100, 102};
+    const std::vector<std::uint32_t> symbolLengths = {2, 2, 2, 1, 1, 1, 1, 1, 1, 1};
+    const std::vector<std::uint8_t> javaCompressed = {0, 0, 0, 3, 4, 4, 4, 0, 3, 5, 5, 2, 2, 7,   1,  1,
+                                                      1, 8, 8, 8, 1, 1, 0, 0, 3, 2, 2, 5, 5, 255, 107};
+
+    const auto decoded = mlt::decoder::StringDecoder::decodeFSST(
+        symbols, symbolLengths, javaCompressed, expected.size());
+    EXPECT_EQ(decoded.size(), expected.size());
+    EXPECT_EQ(0, memcmp(expected.c_str(), decoded.data(), expected.size()));
+}
+
+TEST(FSST, DecodeFromJava_With_multiple_Escape_characters) {
+    const std::string expected = "AAAAAAABBBAAACCdddddEEEEEEfffEEEEAAAAAddddCCkkk";
+    const std::vector<std::uint8_t> symbols = {65, 65, 69, 69, 100, 100, 65, 66, 67, 69, 100, 102};
+    const std::vector<std::uint32_t> symbolLengths = {2, 2, 2, 1, 1, 1, 1, 1, 1, 1};
+    const std::vector<std::uint8_t> javaCompressed = {0, 0, 0, 3, 4, 4, 4, 0, 3, 5, 5, 2,   2,   7,   1,   1,   1,  8,
+                                                      8, 8, 1, 1, 0, 0, 3, 2, 2, 5, 5, 255, 107, 255, 107, 255, 107};
+
+    const auto decoded = mlt::decoder::StringDecoder::decodeFSST(
+        symbols, symbolLengths, javaCompressed, expected.size());
+    EXPECT_EQ(decoded.size(), expected.size());
+    EXPECT_EQ(0, memcmp(expected.c_str(), decoded.data(), expected.size()));
+}
+
+TEST(FSST, DecodeFromJava_With_one_single_escaped_character) {
+    const std::string expected = "k";
+    const std::vector<std::uint8_t> symbols = {65, 65, 69, 69, 100, 100, 65, 66, 67, 69, 100, 102};
+    const std::vector<std::uint32_t> symbolLengths = {2, 2, 2, 1, 1, 1, 1, 1, 1};
+    ;
+    const std::vector<std::uint8_t> javaCompressed = {255, 107};
+
+    const auto decoded = mlt::decoder::StringDecoder::decodeFSST(
+        symbols, symbolLengths, javaCompressed, expected.size());
+    EXPECT_EQ(decoded.size(), expected.size());
+    EXPECT_EQ(0, memcmp(expected.c_str(), decoded.data(), expected.size()));
+}
diff --git a/justfile b/justfile
@@ -158,13 +158,13 @@ mkdocs-build:
     cd mkdocs && docker run --rm -v ${PWD}:/docs squidfunk/mkdocs-material build --strict
 
 # Build Java encoder and generate .mlt files for all .pbf files in test/fixtures
-[working-directory: 'java']
+#[working-directory: 'java']
 generate-expected-mlt:  (cargo-install 'fd' 'fd-find')
     ./gradlew cli
     fd . ../test/fixtures --no-ignore --extension pbf --extension mvt -x {{quote(just_executable())}} generate-one-expected-mlt
 
 # Generate a single .mlt file for a given .mvt or .pbf file, assuming JAR is built
-[working-directory: 'java']
+#[working-directory: 'java']
 [private]
 generate-one-expected-mlt file:
     java \