Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,20 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.Spliterator;
import java.util.UUID;
import java.util.concurrent.Callable;
Expand All @@ -85,6 +88,7 @@
import org.hypertrace.core.documentstore.commons.DocStoreConstants;
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
Expand Down Expand Up @@ -4581,6 +4585,197 @@ void testJsonbNumericComparisonOperators(String dataStoreName) {
}
}

@Nested
class FlatCollectionArrayBehaviourTest {

/**
* Test EXISTS filter on top-level arrays. It should only return arrays that are non-empty (have
* at-least one element)
*/
@ParameterizedTest
@ArgumentsSource(PostgresProvider.class)
void testExistsFilterOnArray(String dataStoreName) throws JsonProcessingException {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection flatCollection =
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);

// Query using EXISTS on array field (simulating ArrayIdentifierExpression behavior)
// tags column has: NULL (row 9), empty '{}' (rows 10, 11, 13), non-empty (rows 1-8, 12, 14)
// Using EXISTS with 'null' parameter (matching entity-service pattern)
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("item"))
.addSelection(IdentifierExpression.of("tags"))
.setFilter(
RelationalExpression.of(
ArrayIdentifierExpression.of("tags"), EXISTS, ConstantExpression.of("null")))
.build();

Iterator<Document> results = flatCollection.find(query);

int count = 0;
while (results.hasNext()) {
Document doc = results.next();
JsonNode json = new ObjectMapper().readTree(doc.toJson());
count++;
// Verify that ALL returned documents have non-empty arrays
JsonNode tags = json.get("tags");
assertTrue(
tags.isArray() && !tags.isEmpty(), "tags should be non-empty array, but was: " + tags);
}

// Should return only documents with non-empty arrays
// From test data: rows 1-8 have non-empty arrays (8 docs)
// Plus rows 9, 10 have non-empty arrays (2 docs)
// Total: 10 documents
assertEquals(8, count, "Should return a total of 10 docs that have non-empty tags");
}

/**
* Test NOT_EXISTS filter on top-level arrays. This validates that NOT_EXISTS on array fields
* returns both NULL and empty arrays, excluding only non-empty arrays.
*/
@ParameterizedTest
@ArgumentsSource(PostgresProvider.class)
void testNotExistsFilterOnArrays(String dataStoreName) throws JsonProcessingException {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection flatCollection =
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);

// Query using NOT_EXISTS on array field (simulating ArrayIdentifierExpression behavior)
// Using NOT_EXISTS with 'null' parameter (matching entity-service pattern)
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("item"))
.addSelection(IdentifierExpression.of("tags"))
.setFilter(
RelationalExpression.of(
ArrayIdentifierExpression.of("tags"),
NOT_EXISTS,
ConstantExpression.of("null")))
.build();

Iterator<Document> results = flatCollection.find(query);

int count = 0;
while (results.hasNext()) {
Document doc = results.next();
JsonNode json = new ObjectMapper().readTree(doc.toJson());
count++;
// Verify that ALL returned documents have NULL or empty arrays
JsonNode tags = json.get("tags");
assertTrue(
tags == null || !tags.isArray() || tags.isEmpty(),
"tags should be NULL or empty array, but was: " + tags);
}

// Should return documents with NULL or empty arrays
// From test data: row 9 (NULL), rows 10, 11, 13 (empty arrays)
// Total: 4 documents
assertEquals(2, count, "Should return at 4 documents with NULL or empty tags");
}

/**
* Test EXISTS filter on JSONB arrays. Should only return non-empty arrays (with at-least one
* element).
*/
@ParameterizedTest
@ArgumentsSource(PostgresProvider.class)
void testExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingException {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection flatCollection =
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);

// Query using EXISTS on JSONB array field
// props.colors has: non-empty (rows 1, 3, 5), empty (row 7), NULL (rest)
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("item"))
.addSelection(JsonIdentifierExpression.of("props", "colors"))
.setFilter(
RelationalExpression.of(
JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"),
EXISTS,
ConstantExpression.of("null")))
.build();

Iterator<Document> results = flatCollection.find(query);

int count = 0;
while (results.hasNext()) {
Document doc = results.next();
JsonNode json = new ObjectMapper().readTree(doc.toJson());
count++;

// Verify that ALL returned documents have non-empty arrays in props.colors
JsonNode props = json.get("props");
assertTrue(props.isObject(), "props should be a JSON object");

JsonNode colors = props.get("colors");
assertTrue(
colors.isArray() && !colors.isEmpty(),
"colors should be non-empty array, but was: " + colors);
}

// Should return rows 1, 2, 3 which have non-empty colors arrays
assertEquals(3, count, "Should return exactly 3 documents with non-empty colors");
}

/**
* Test NOT_EXISTS filter on JSONB arrays. This validates that NOT_EXISTS on array fields inside
* JSONB returns documents where the field is NULL, the parent object is NULL, or the array is
* empty.
*/
@ParameterizedTest
@ArgumentsSource(PostgresProvider.class)
void testNotExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingException {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection flatCollection =
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);

// Query using NOT_EXISTS on JSONB array field
// Test with props.colors field
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("item"))
.addSelection(JsonIdentifierExpression.of("props", "colors"))
.setFilter(
RelationalExpression.of(
JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"),
NOT_EXISTS,
ConstantExpression.of("null")))
.build();

Iterator<Document> results = flatCollection.find(query);

int count = 0;
Set<String> returnedItems = new HashSet<>();
while (results.hasNext()) {
Document doc = results.next();
JsonNode json = new ObjectMapper().readTree(doc.toJson());
count++;

String item = json.get("item").asText();
returnedItems.add(item);

// Verify that returned documents have NULL parent, missing field, or empty arrays
JsonNode props = json.get("props");
if (props != null && props.isObject()) {
JsonNode colors = props.get("colors");
assertTrue(
colors == null || !colors.isArray() || colors.isEmpty(),
"colors should be NULL or empty array for item: " + item + ", but was: " + colors);
}
// NULL props is also valid (if props is null, then props->colours is null too)
}

// Should include documents where props is NULL or props.colors is NULL/empty
assertTrue(count > 0, "Should return at least some documents");
assertTrue(
returnedItems.contains("Comb"), "Should include Comb (has empty colors array in props)");
}
}

@Nested
class BulkUpdateTest {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,49 @@

import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresFieldTypeDetector.FieldCategory;

class PostgresExistsRelationalFilterParser implements PostgresRelationalFilterParser {

@Override
public String parse(
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
// If true:
// Regular fields -> IS NOT NULL
// Arrays -> IS NOT NULL and cardinality(...) > 0,
// JSONB arrays: IS NOT NULL and jsonb_array_length(...) > 0
// If false:
// Regular fields -> IS NULL
// Arrays -> IS NULL OR cardinality(...) = 0,
// JSONB arrays: IS NULL OR (jsonb_typeof(%s) = 'array' AND jsonb_array_length(...) = 0)
final boolean parsedRhs = !ConstantExpression.of(false).equals(expression.getRhs());
return parsedRhs
? String.format("%s IS NOT NULL", parsedLhs)
: String.format("%s IS NULL", parsedLhs);

FieldCategory category = expression.getLhs().accept(new PostgresFieldTypeDetector());

switch (category) {
case ARRAY:
// First-class PostgreSQL array columns (text[], int[], etc.)
return parsedRhs
? String.format("(%s IS NOT NULL AND cardinality(%s) > 0)", parsedLhs, parsedLhs)
: String.format("(%s IS NULL OR cardinality(%s) = 0)", parsedLhs, parsedLhs);

case JSONB_ARRAY:
// Arrays inside JSONB columns
return parsedRhs
? String.format(
"(%s IS NOT NULL AND jsonb_typeof(%s) = 'array' AND jsonb_array_length(%s) > 0)",
parsedLhs, parsedLhs, parsedLhs)
: String.format(
"(%s IS NULL OR (jsonb_typeof(%s) = 'array' AND jsonb_array_length(%s) = 0))",
parsedLhs, parsedLhs, parsedLhs);

case SCALAR:
default:
// Regular scalar fields
return parsedRhs
? String.format("%s IS NOT NULL", parsedLhs)
: String.format("%s IS NULL", parsedLhs);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;

import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;

/**
* Visitor to detect the category of a field expression for array-aware SQL generation.
*
* <p>Categorizes fields into three types:
*
* <ul>
* <li><b>SCALAR:</b> Regular fields and JSON primitives (strings, numbers, booleans, objects)
* <li><b>POSTGRES_ARRAY:</b> Native PostgreSQL arrays (text[], integer[], boolean[], etc.)
* <li><b>JSONB_ARRAY:</b> Arrays inside JSONB columns with JsonFieldType annotation
* </ul>
*
* <p>This categorization is used by EXISTS/NOT_EXISTS parsers to generate appropriate SQL:
*
* <ul>
* <li>SCALAR: {@code IS NOT NULL / IS NULL}
* <li>POSTGRES_ARRAY: {@code IS NOT NULL AND cardinality(...) > 0}
* <li>JSONB_ARRAY: {@code IS NOT NULL AND jsonb_array_length(...) > 0}
* </ul>
*/
class PostgresFieldTypeDetector implements SelectTypeExpressionVisitor {

/** Field category for determining appropriate SQL generation strategy */
enum FieldCategory {
SCALAR, // Regular fields and JSON primitives
ARRAY, // Native PostgreSQL arrays (text[], int[], etc.)
JSONB_ARRAY // Arrays inside JSONB columns
}

@Override
public FieldCategory visit(ArrayIdentifierExpression expression) {
return FieldCategory.ARRAY;
}

@Override
public FieldCategory visit(JsonIdentifierExpression expression) {
return expression
.getFieldType()
.filter(
type ->
type == JsonFieldType.STRING_ARRAY
|| type == JsonFieldType.NUMBER_ARRAY
|| type == JsonFieldType.BOOLEAN_ARRAY
|| type == JsonFieldType.OBJECT_ARRAY)
.map(type -> FieldCategory.JSONB_ARRAY)
.orElse(FieldCategory.SCALAR);
}

@Override
public FieldCategory visit(IdentifierExpression expression) {
return FieldCategory.SCALAR;
}

@Override
public FieldCategory visit(AggregateExpression expression) {
return FieldCategory.SCALAR;
}

@Override
public FieldCategory visit(ConstantExpression expression) {
return FieldCategory.SCALAR;
}

@Override
public FieldCategory visit(DocumentConstantExpression expression) {
return FieldCategory.SCALAR;
}

@Override
public FieldCategory visit(FunctionExpression expression) {
return FieldCategory.SCALAR;
}

@Override
public FieldCategory visit(AliasedIdentifierExpression expression) {
return FieldCategory.SCALAR;
}
}
Loading
Loading