Skip to content

Commit 828b73c

Browse files
committed
Optimise queries for indices
1 parent e1a3671 commit 828b73c

File tree

6 files changed

+362
-56
lines changed

6 files changed

+362
-56
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
22

33
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
4+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
45
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
56
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresFieldTypeDetector.FieldCategory;
67

@@ -13,11 +14,11 @@ public String parse(
1314
// If true:
1415
// Regular fields -> IS NOT NULL
1516
// Arrays -> IS NOT NULL and cardinality(...) > 0,
16-
// JSONB arrays: IS NOT NULL and jsonb_array_length(...) > 0
17+
// JSONB arrays: Optimized GIN index query with containment check
1718
// If false:
1819
// Regular fields -> IS NULL
1920
// Arrays -> IS NULL OR cardinality(...) = 0,
20-
// JSONB arrays: IS NULL OR (jsonb_typeof(%s) = 'array' AND jsonb_array_length(...) = 0)
21+
// JSONB arrays: COALESCE with array length check
2122
final boolean parsedRhs = !ConstantExpression.of(false).equals(expression.getRhs());
2223

2324
FieldCategory category = expression.getLhs().accept(new PostgresFieldTypeDetector());
@@ -26,25 +27,49 @@ public String parse(
2627
case ARRAY:
2728
// First-class PostgreSQL array columns (text[], int[], etc.)
2829
return parsedRhs
29-
? String.format("(%s IS NOT NULL AND cardinality(%s) > 0)", parsedLhs, parsedLhs)
30-
: String.format("(%s IS NULL OR cardinality(%s) = 0)", parsedLhs, parsedLhs);
30+
// We don't need to check that LHS is NOT NULL because WHERE cardinality(NULL) will not
31+
// be included in the result set
32+
? String.format("(cardinality(%s) > 0)", parsedLhs)
33+
: String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs);
3134

3235
case JSONB_ARRAY:
33-
// Arrays inside JSONB columns
34-
return parsedRhs
35-
? String.format(
36-
"(%s IS NOT NULL AND jsonb_typeof(%s) = 'array' AND jsonb_array_length(%s) > 0)",
37-
parsedLhs, parsedLhs, parsedLhs)
38-
: String.format(
39-
"(%s IS NULL OR (jsonb_typeof(%s) = 'array' AND jsonb_array_length(%s) = 0))",
40-
parsedLhs, parsedLhs, parsedLhs);
36+
{
37+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
38+
String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName());
39+
String nestedPath = String.join(".", jsonExpr.getJsonPath());
40+
return parsedRhs
41+
// This is type-safe and will use the GIN index on parent JSONB col
42+
? String.format(
43+
"(%s @> '{\"" + nestedPath + "\": []}' AND jsonb_array_length(%s) > 0)",
44+
baseColumn,
45+
parsedLhs)
46+
// Return the number of elements in a JSONB array, default value of 0 if the array is
47+
// NULL
48+
: String.format("COALESCE(jsonb_array_length(%s), 0) = 0", parsedLhs);
49+
}
50+
51+
case JSONB_SCALAR:
52+
{
53+
// JSONB scalar fields - use ? operator for GIN index optimization
54+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
55+
String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName());
56+
String nestedPath = String.join(".", jsonExpr.getJsonPath());
57+
58+
return parsedRhs
59+
? String.format("%s ? '%s'", baseColumn, nestedPath)
60+
: String.format("NOT (%s ? '%s')", baseColumn, nestedPath);
61+
}
4162

4263
case SCALAR:
4364
default:
44-
// Regular scalar fields
65+
// Regular scalar fields - use standard NULL checks
4566
return parsedRhs
4667
? String.format("%s IS NOT NULL", parsedLhs)
4768
: String.format("%s IS NULL", parsedLhs);
4869
}
4970
}
71+
72+
private String wrapWithDoubleQuotes(String identifier) {
73+
return "\"" + identifier + "\"";
74+
}
5075
}

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresFieldTypeDetector.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,32 @@
1414
/**
1515
* Visitor to detect the category of a field expression for array-aware SQL generation.
1616
*
17-
* <p>Categorizes fields into three types:
17+
* <p>Categorizes fields into four types:
1818
*
1919
* <ul>
20-
* <li><b>SCALAR:</b> Regular fields and JSON primitives (strings, numbers, booleans, objects)
21-
* <li><b>POSTGRES_ARRAY:</b> Native PostgreSQL arrays (text[], integer[], boolean[], etc.)
20+
* <li><b>SCALAR:</b> Regular non-JSON fields
21+
* <li><b>ARRAY:</b> Native PostgreSQL arrays (text[], integer[], boolean[], etc.)
22+
* <li><b>JSONB_SCALAR:</b> Scalar fields inside JSONB columns (strings, numbers, booleans,
23+
* objects)
2224
* <li><b>JSONB_ARRAY:</b> Arrays inside JSONB columns with JsonFieldType annotation
2325
* </ul>
2426
*
2527
* <p>This categorization is used by EXISTS/NOT_EXISTS parsers to generate appropriate SQL:
2628
*
2729
* <ul>
2830
* <li>SCALAR: {@code IS NOT NULL / IS NULL}
29-
* <li>POSTGRES_ARRAY: {@code IS NOT NULL AND cardinality(...) > 0}
30-
* <li>JSONB_ARRAY: {@code IS NOT NULL AND jsonb_array_length(...) > 0}
31+
* <li>ARRAY: {@code IS NOT NULL AND cardinality(...) > 0}
32+
* <li>JSONB_SCALAR: {@code "col" ? 'field'} (uses GIN index)
33+
* <li>JSONB_ARRAY: {@code "col" @> '{field:[]}' AND jsonb_array_length(...) > 0} (uses GIN index)
3134
* </ul>
3235
*/
3336
class PostgresFieldTypeDetector implements SelectTypeExpressionVisitor {
3437

3538
/** Field category for determining appropriate SQL generation strategy */
3639
enum FieldCategory {
37-
SCALAR, // Regular fields and JSON primitives
40+
SCALAR, // Regular non-JSON fields
3841
ARRAY, // Native PostgreSQL arrays (text[], int[], etc.)
42+
JSONB_SCALAR, // Scalar fields inside JSONB columns
3943
JSONB_ARRAY // Arrays inside JSONB columns
4044
}
4145

@@ -55,7 +59,7 @@ public FieldCategory visit(JsonIdentifierExpression expression) {
5559
|| type == JsonFieldType.BOOLEAN_ARRAY
5660
|| type == JsonFieldType.OBJECT_ARRAY)
5761
.map(type -> FieldCategory.JSONB_ARRAY)
58-
.orElse(FieldCategory.SCALAR);
62+
.orElse(FieldCategory.JSONB_SCALAR);
5963
}
6064

6165
@Override
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
22

33
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
4+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
45
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
56
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresFieldTypeDetector.FieldCategory;
67

@@ -13,11 +14,11 @@ public String parse(
1314
// If true (RHS = false):
1415
// Regular fields -> IS NOT NULL
1516
// Arrays -> IS NOT NULL AND cardinality(...) > 0
16-
// JSONB arrays: IS NOT NULL AND jsonb_typeof(%s) = 'array' AND jsonb_array_length(...) > 0
17+
// JSONB arrays: Optimized GIN index query with containment check
1718
// If false (RHS = true or other):
1819
// Regular fields -> IS NULL
1920
// Arrays -> IS NULL OR cardinality(...) = 0
20-
// JSONB arrays: IS NULL OR (jsonb_typeof(%s) = 'array' AND jsonb_array_length(...) = 0)
21+
// JSONB arrays: COALESCE with array length check
2122
final boolean parsedRhs = ConstantExpression.of(false).equals(expression.getRhs());
2223

2324
FieldCategory category = expression.getLhs().accept(new PostgresFieldTypeDetector());
@@ -28,24 +29,52 @@ public String parse(
2829
// at-least 1 element in it (so exclude NULL or empty arrays). This is to match Mongo's
2930
// behavior
3031
return parsedRhs
31-
? String.format("(%s IS NOT NULL AND cardinality(%s) > 0)", parsedLhs, parsedLhs)
32-
: String.format("(%s IS NULL OR cardinality(%s) = 0)", parsedLhs, parsedLhs);
32+
? String.format("(cardinality(%s) > 0)", parsedLhs)
33+
// More efficient than: %s IS NULL OR cardinality(%s) = 0)? as we can create
34+
// an index on the COALESCE function itself which will return in a single
35+
// index seek rather than two index seeks in the OR query
36+
: String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs);
3337

3438
case JSONB_ARRAY:
35-
return parsedRhs
36-
? String.format(
37-
"(%s IS NOT NULL AND jsonb_typeof(%s) = 'array' AND jsonb_array_length(%s) > 0)",
38-
parsedLhs, parsedLhs, parsedLhs)
39-
: String.format(
40-
"(%s IS NULL OR (jsonb_typeof(%s) = 'array' AND jsonb_array_length(%s) = 0))",
41-
parsedLhs, parsedLhs, parsedLhs);
39+
{
40+
// Arrays inside JSONB columns - use optimized GIN index queries
41+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
42+
String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName());
43+
String nestedPath = String.join(".", jsonExpr.getJsonPath());
44+
45+
return parsedRhs
46+
? String.format(
47+
"(%s @> '{\"" + nestedPath + "\": []}' AND jsonb_array_length(%s) > 0)",
48+
baseColumn,
49+
parsedLhs)
50+
: String.format("COALESCE(jsonb_array_length(%s), 0) = 0", parsedLhs);
51+
}
52+
53+
case JSONB_SCALAR:
54+
{
55+
// JSONB scalar fields - use ? operator for GIN index optimization
56+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
57+
String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName());
58+
String nestedPath = String.join(".", jsonExpr.getJsonPath());
59+
60+
return parsedRhs
61+
// Uses the GIN index on the parent JSONB col
62+
? String.format("%s ? '%s'", baseColumn, nestedPath)
63+
// Does not use the GIN index but is more computationally efficient than doing a IS
64+
// NULL check
65+
: String.format("NOT (%s ? '%s')", baseColumn, nestedPath);
66+
}
4267

4368
case SCALAR:
4469
default:
45-
// Regular scalar fields
70+
// Regular scalar fields - use standard NULL checks
4671
return parsedRhs
4772
? String.format("%s IS NOT NULL", parsedLhs)
4873
: String.format("%s IS NULL", parsedLhs);
4974
}
5075
}
76+
77+
private String wrapWithDoubleQuotes(String identifier) {
78+
return "\"" + identifier + "\"";
79+
}
5180
}

0 commit comments

Comments
 (0)