Skip to content

Commit 6526d3c

Browse files
authored
Handle Array Exists/Not Exists in Flat Collections (#250)
1 parent c7bb6ad commit 6526d3c

File tree

8 files changed

+1142
-6
lines changed

8 files changed

+1142
-6
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,20 @@
6262
import static org.junit.jupiter.api.Assertions.assertThrows;
6363
import static org.junit.jupiter.api.Assertions.assertTrue;
6464

65+
import com.fasterxml.jackson.core.JsonProcessingException;
6566
import com.fasterxml.jackson.databind.JsonNode;
6667
import com.fasterxml.jackson.databind.ObjectMapper;
6768
import com.typesafe.config.Config;
6869
import com.typesafe.config.ConfigFactory;
6970
import java.io.IOException;
7071
import java.util.HashMap;
72+
import java.util.HashSet;
7173
import java.util.Iterator;
7274
import java.util.List;
7375
import java.util.Map;
7476
import java.util.Optional;
7577
import java.util.Random;
78+
import java.util.Set;
7679
import java.util.Spliterator;
7780
import java.util.UUID;
7881
import java.util.concurrent.Callable;
@@ -85,6 +88,7 @@
8588
import org.hypertrace.core.documentstore.commons.DocStoreConstants;
8689
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
8790
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
91+
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
8892
import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression;
8993
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
9094
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
@@ -4581,6 +4585,197 @@ void testJsonbNumericComparisonOperators(String dataStoreName) {
45814585
}
45824586
}
45834587

4588+
@Nested
4589+
class FlatCollectionArrayBehaviourTest {
4590+
4591+
/**
4592+
* Test EXISTS filter on top-level arrays. It should only return arrays that are non-empty (have
4593+
* at-least one element)
4594+
*/
4595+
@ParameterizedTest
4596+
@ArgumentsSource(PostgresProvider.class)
4597+
void testExistsFilterOnArray(String dataStoreName) throws JsonProcessingException {
4598+
Datastore datastore = datastoreMap.get(dataStoreName);
4599+
Collection flatCollection =
4600+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4601+
4602+
// Query using EXISTS on array field (simulating ArrayIdentifierExpression behavior)
4603+
// tags column has: NULL (row 9), empty '{}' (rows 10, 11, 13), non-empty (rows 1-8, 12, 14)
4604+
// Using EXISTS with 'null' parameter (matching entity-service pattern)
4605+
Query query =
4606+
Query.builder()
4607+
.addSelection(IdentifierExpression.of("item"))
4608+
.addSelection(IdentifierExpression.of("tags"))
4609+
.setFilter(
4610+
RelationalExpression.of(
4611+
ArrayIdentifierExpression.of("tags"), EXISTS, ConstantExpression.of("null")))
4612+
.build();
4613+
4614+
Iterator<Document> results = flatCollection.find(query);
4615+
4616+
int count = 0;
4617+
while (results.hasNext()) {
4618+
Document doc = results.next();
4619+
JsonNode json = new ObjectMapper().readTree(doc.toJson());
4620+
count++;
4621+
// Verify that ALL returned documents have non-empty arrays
4622+
JsonNode tags = json.get("tags");
4623+
assertTrue(
4624+
tags.isArray() && !tags.isEmpty(), "tags should be non-empty array, but was: " + tags);
4625+
}
4626+
4627+
// Should return only documents with non-empty arrays
4628+
// From test data: rows 1-8 have non-empty arrays (8 docs)
4629+
// Plus rows 9, 10 have non-empty arrays (2 docs)
4630+
// Total: 10 documents
4631+
assertEquals(8, count, "Should return a total of 10 docs that have non-empty tags");
4632+
}
4633+
4634+
/**
4635+
* Test NOT_EXISTS filter on top-level arrays. This validates that NOT_EXISTS on array fields
4636+
* returns both NULL and empty arrays, excluding only non-empty arrays.
4637+
*/
4638+
@ParameterizedTest
4639+
@ArgumentsSource(PostgresProvider.class)
4640+
void testNotExistsFilterOnArrays(String dataStoreName) throws JsonProcessingException {
4641+
Datastore datastore = datastoreMap.get(dataStoreName);
4642+
Collection flatCollection =
4643+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4644+
4645+
// Query using NOT_EXISTS on array field (simulating ArrayIdentifierExpression behavior)
4646+
// Using NOT_EXISTS with 'null' parameter (matching entity-service pattern)
4647+
Query query =
4648+
Query.builder()
4649+
.addSelection(IdentifierExpression.of("item"))
4650+
.addSelection(IdentifierExpression.of("tags"))
4651+
.setFilter(
4652+
RelationalExpression.of(
4653+
ArrayIdentifierExpression.of("tags"),
4654+
NOT_EXISTS,
4655+
ConstantExpression.of("null")))
4656+
.build();
4657+
4658+
Iterator<Document> results = flatCollection.find(query);
4659+
4660+
int count = 0;
4661+
while (results.hasNext()) {
4662+
Document doc = results.next();
4663+
JsonNode json = new ObjectMapper().readTree(doc.toJson());
4664+
count++;
4665+
// Verify that ALL returned documents have NULL or empty arrays
4666+
JsonNode tags = json.get("tags");
4667+
assertTrue(
4668+
tags == null || !tags.isArray() || tags.isEmpty(),
4669+
"tags should be NULL or empty array, but was: " + tags);
4670+
}
4671+
4672+
// Should return documents with NULL or empty arrays
4673+
// From test data: row 9 (NULL), rows 10, 11, 13 (empty arrays)
4674+
// Total: 4 documents
4675+
assertEquals(2, count, "Should return at 4 documents with NULL or empty tags");
4676+
}
4677+
4678+
/**
4679+
* Test EXISTS filter on JSONB arrays. Should only return non-empty arrays (with at-least one
4680+
* element).
4681+
*/
4682+
@ParameterizedTest
4683+
@ArgumentsSource(PostgresProvider.class)
4684+
void testExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingException {
4685+
Datastore datastore = datastoreMap.get(dataStoreName);
4686+
Collection flatCollection =
4687+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4688+
4689+
// Query using EXISTS on JSONB array field
4690+
// props.colors has: non-empty (rows 1, 3, 5), empty (row 7), NULL (rest)
4691+
Query query =
4692+
Query.builder()
4693+
.addSelection(IdentifierExpression.of("item"))
4694+
.addSelection(JsonIdentifierExpression.of("props", "colors"))
4695+
.setFilter(
4696+
RelationalExpression.of(
4697+
JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"),
4698+
EXISTS,
4699+
ConstantExpression.of("null")))
4700+
.build();
4701+
4702+
Iterator<Document> results = flatCollection.find(query);
4703+
4704+
int count = 0;
4705+
while (results.hasNext()) {
4706+
Document doc = results.next();
4707+
JsonNode json = new ObjectMapper().readTree(doc.toJson());
4708+
count++;
4709+
4710+
// Verify that ALL returned documents have non-empty arrays in props.colors
4711+
JsonNode props = json.get("props");
4712+
assertTrue(props.isObject(), "props should be a JSON object");
4713+
4714+
JsonNode colors = props.get("colors");
4715+
assertTrue(
4716+
colors.isArray() && !colors.isEmpty(),
4717+
"colors should be non-empty array, but was: " + colors);
4718+
}
4719+
4720+
// Should return rows 1, 2, 3 which have non-empty colors arrays
4721+
assertEquals(3, count, "Should return exactly 3 documents with non-empty colors");
4722+
}
4723+
4724+
/**
4725+
* Test NOT_EXISTS filter on JSONB arrays. This validates that NOT_EXISTS on array fields inside
4726+
* JSONB returns documents where the field is NULL, the parent object is NULL, or the array is
4727+
* empty.
4728+
*/
4729+
@ParameterizedTest
4730+
@ArgumentsSource(PostgresProvider.class)
4731+
void testNotExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingException {
4732+
Datastore datastore = datastoreMap.get(dataStoreName);
4733+
Collection flatCollection =
4734+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4735+
4736+
// Query using NOT_EXISTS on JSONB array field
4737+
// Test with props.colors field
4738+
Query query =
4739+
Query.builder()
4740+
.addSelection(IdentifierExpression.of("item"))
4741+
.addSelection(JsonIdentifierExpression.of("props", "colors"))
4742+
.setFilter(
4743+
RelationalExpression.of(
4744+
JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"),
4745+
NOT_EXISTS,
4746+
ConstantExpression.of("null")))
4747+
.build();
4748+
4749+
Iterator<Document> results = flatCollection.find(query);
4750+
4751+
int count = 0;
4752+
Set<String> returnedItems = new HashSet<>();
4753+
while (results.hasNext()) {
4754+
Document doc = results.next();
4755+
JsonNode json = new ObjectMapper().readTree(doc.toJson());
4756+
count++;
4757+
4758+
String item = json.get("item").asText();
4759+
returnedItems.add(item);
4760+
4761+
// Verify that returned documents have NULL parent, missing field, or empty arrays
4762+
JsonNode props = json.get("props");
4763+
if (props != null && props.isObject()) {
4764+
JsonNode colors = props.get("colors");
4765+
assertTrue(
4766+
colors == null || !colors.isArray() || colors.isEmpty(),
4767+
"colors should be NULL or empty array for item: " + item + ", but was: " + colors);
4768+
}
4769+
// NULL props is also valid (if props is null, then props->colours is null too)
4770+
}
4771+
4772+
// Should include documents where props is NULL or props.colors is NULL/empty
4773+
assertTrue(count > 0, "Should return at least some documents");
4774+
assertTrue(
4775+
returnedItems.contains("Comb"), "Should include Comb (has empty colors array in props)");
4776+
}
4777+
}
4778+
45844779
@Nested
45854780
class BulkUpdateTest {
45864781

Original file line numberDiff line numberDiff line change
@@ -1,16 +1,75 @@
11
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
22

33
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
4+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
45
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
6+
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresFieldTypeDetector.FieldCategory;
57

68
class PostgresExistsRelationalFilterParser implements PostgresRelationalFilterParser {
9+
710
@Override
811
public String parse(
912
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
1013
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
14+
// If true:
15+
// Regular fields -> IS NOT NULL
16+
// Arrays -> IS NOT NULL and cardinality(...) > 0,
17+
// JSONB arrays: Optimized GIN index query with containment check
18+
// If false:
19+
// Regular fields -> IS NULL
20+
// Arrays -> IS NULL OR cardinality(...) = 0,
21+
// JSONB arrays: COALESCE with array length check
1122
final boolean parsedRhs = !ConstantExpression.of(false).equals(expression.getRhs());
12-
return parsedRhs
13-
? String.format("%s IS NOT NULL", parsedLhs)
14-
: String.format("%s IS NULL", parsedLhs);
23+
24+
FieldCategory category = expression.getLhs().accept(new PostgresFieldTypeDetector());
25+
26+
switch (category) {
27+
case ARRAY:
28+
// First-class PostgreSQL array columns (text[], int[], etc.)
29+
return parsedRhs
30+
// We don't need to check that LHS is NOT NULL because WHERE cardinality(NULL) will not
31+
// be included in the result set
32+
? String.format("(cardinality(%s) > 0)", parsedLhs)
33+
: String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs);
34+
35+
case JSONB_ARRAY:
36+
{
37+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
38+
String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName());
39+
String nestedPath = String.join(".", jsonExpr.getJsonPath());
40+
return parsedRhs
41+
// This is type-safe and will use the GIN index on parent JSONB col
42+
? String.format(
43+
"(%s @> '{\"" + nestedPath + "\": []}' AND jsonb_array_length(%s) > 0)",
44+
baseColumn,
45+
parsedLhs)
46+
// Return the number of elements in a JSONB array, default value of 0 if the array is
47+
// NULL
48+
: String.format("COALESCE(jsonb_array_length(%s), 0) = 0", parsedLhs);
49+
}
50+
51+
case JSONB_SCALAR:
52+
{
53+
// JSONB scalar fields - use ? operator for GIN index optimization
54+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
55+
String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName());
56+
String nestedPath = String.join(".", jsonExpr.getJsonPath());
57+
58+
return parsedRhs
59+
? String.format("%s ? '%s'", baseColumn, nestedPath)
60+
: String.format("NOT (%s ? '%s')", baseColumn, nestedPath);
61+
}
62+
63+
case SCALAR:
64+
default:
65+
// Regular scalar fields - use standard NULL checks
66+
return parsedRhs
67+
? String.format("%s IS NOT NULL", parsedLhs)
68+
: String.format("%s IS NULL", parsedLhs);
69+
}
70+
}
71+
72+
private String wrapWithDoubleQuotes(String identifier) {
73+
return "\"" + identifier + "\"";
1574
}
1675
}

0 commit comments

Comments
 (0)