Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package org.hypertrace.core.documentstore.expression.impl;

/** Represents the type of JSON fields in flat collections */
public enum JsonFieldType {
STRING,
NUMBER,
BOOLEAN,
STRING_ARRAY,
NUMBER_ARRAY,
BOOLEAN_ARRAY,
OBJECT_ARRAY,
OBJECT
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.hypertrace.core.documentstore.expression.impl;

import java.util.List;
import java.util.Optional;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.hypertrace.core.documentstore.parser.FieldTransformationVisitor;
Expand All @@ -20,6 +21,7 @@ public class JsonIdentifierExpression extends IdentifierExpression {

String columnName; // e.g., "customAttr" (the top-level JSONB column)
List<String> jsonPath; // e.g., ["myAttribute", "nestedField"]
JsonFieldType fieldType; // Optional: PRIMITIVE or ARRAY for optimization

public static JsonIdentifierExpression of(final String columnName) {
throw new IllegalArgumentException(
Expand All @@ -34,7 +36,20 @@ public static JsonIdentifierExpression of(final String columnName, final String.
return of(columnName, List.of(pathElements));
}

public static JsonIdentifierExpression of(
final String columnName, final JsonFieldType fieldType, final String... pathElements) {
if (pathElements == null || pathElements.length == 0) {
throw new IllegalArgumentException("JSON path cannot be null or empty");
}
return of(columnName, fieldType, List.of(pathElements));
}

public static JsonIdentifierExpression of(final String columnName, final List<String> jsonPath) {
return of(columnName, null, jsonPath);
}

public static JsonIdentifierExpression of(
final String columnName, final JsonFieldType fieldType, final List<String> jsonPath) {
BasicPostgresSecurityValidator.getDefault().validateIdentifier(columnName);

if (jsonPath == null || jsonPath.isEmpty()) {
Expand All @@ -47,13 +62,20 @@ public static JsonIdentifierExpression of(final String columnName, final List<St

// Construct full name for compatibility: "customAttr.myAttribute"
String fullName = columnName + "." + String.join(".", unmodifiablePath);
return new JsonIdentifierExpression(fullName, columnName, unmodifiablePath);
return new JsonIdentifierExpression(fullName, columnName, unmodifiablePath, fieldType);
}

protected JsonIdentifierExpression(String name, String columnName, List<String> jsonPath) {
protected JsonIdentifierExpression(
String name, String columnName, List<String> jsonPath, JsonFieldType fieldType) {
super(name);
this.columnName = columnName;
this.jsonPath = jsonPath;
this.fieldType = fieldType;
}

/** Returns the JSON field type if specified, empty otherwise */
public Optional<JsonFieldType> getFieldType() {
return Optional.ofNullable(fieldType);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,21 @@
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresInRelationalFilterParserArrayField;
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresInRelationalFilterParserScalarField;

class PostgresInParserSelector implements SelectTypeExpressionVisitor {

// Parsers for different expression types
private static final PostgresInRelationalFilterParserInterface jsonFieldInFilterParser =
new PostgresInRelationalFilterParser();
new PostgresInRelationalFilterParser(); // Fallback for JSON without type info
private static final PostgresInRelationalFilterParserInterface jsonPrimitiveInFilterParser =
new PostgresInRelationalFilterParserJsonPrimitive(); // Optimized for JSON primitives
private static final PostgresInRelationalFilterParserInterface jsonArrayInFilterParser =
new PostgresInRelationalFilterParserJsonArray(); // Optimized for JSON arrays
private static final PostgresInRelationalFilterParserInterface scalarFieldInFilterParser =
new PostgresInRelationalFilterParserScalarField();
private static final PostgresInRelationalFilterParserInterface arrayFieldInFilterParser =
Expand All @@ -29,7 +35,28 @@ class PostgresInParserSelector implements SelectTypeExpressionVisitor {

@Override
public PostgresInRelationalFilterParserInterface visit(JsonIdentifierExpression expression) {
return jsonFieldInFilterParser;
// JsonFieldType is required for optimized SQL generation
JsonFieldType fieldType = getFieldType(expression);

switch (fieldType) {
case STRING:
case NUMBER:
case BOOLEAN:
// Primitives: use ->> (extract as text) with appropriate casting
return jsonPrimitiveInFilterParser;
case STRING_ARRAY:
case NUMBER_ARRAY:
case BOOLEAN_ARRAY:
case OBJECT_ARRAY:
// Typed arrays: use -> with @> and typed jsonb_build_array
return jsonArrayInFilterParser;
case OBJECT:
// Objects: use -> with @> (future: needs separate parser)
throw new UnsupportedOperationException(
"IN operator on OBJECT type is not yet supported. Use primitive or array types.");
default:
throw new IllegalArgumentException("Unsupported JsonFieldType: " + fieldType);
}
}

@Override
Expand Down Expand Up @@ -68,4 +95,14 @@ public PostgresInRelationalFilterParserInterface visit(FunctionExpression expres
public PostgresInRelationalFilterParserInterface visit(AliasedIdentifierExpression expression) {
return isFlatCollection ? scalarFieldInFilterParser : jsonFieldInFilterParser;
}

private static JsonFieldType getFieldType(JsonIdentifierExpression expression) {
return expression
.getFieldType()
.orElseThrow(
() ->
new IllegalArgumentException(
"JsonFieldType must be specified for JsonIdentifierExpression in IN operations. "
+ "Use JsonIdentifierExpression.of(column, JsonFieldType.*, path...)"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;

import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
import org.hypertrace.core.documentstore.postgres.Params;

/**
* Optimized parser for IN operations on JSON array fields with type-specific casting.
*
* <p>Uses JSONB containment operator (@>) with typed jsonb_build_array for "contains any"
* semantics:
*
* <ul>
* <li><b>STRING_ARRAY:</b> {@code "document" -> 'tags' @> jsonb_build_array(?::text)}
* <li><b>NUMBER_ARRAY:</b> {@code "document" -> 'scores' @> jsonb_build_array(?::numeric)}
* <li><b>BOOLEAN_ARRAY:</b> {@code "document" -> 'flags' @> jsonb_build_array(?::boolean)}
* <li><b>OBJECT_ARRAY:</b> {@code "document" -> 'items' @> jsonb_build_array(?::jsonb)}
* </ul>
*
* <p>This checks if the JSON array contains ANY of the provided values, using efficient JSONB
* containment instead of defensive type checking.
*/
public class PostgresInRelationalFilterParserJsonArray
implements PostgresInRelationalFilterParserInterface {

@Override
public String parse(
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
final Iterable<Object> parsedRhs = expression.getRhs().accept(context.rhsParser());

// Extract field type for typed array handling (guaranteed to be present by selector)
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
JsonFieldType fieldType =
jsonExpr
.getFieldType()
.orElseThrow(
() ->
new IllegalStateException(
"JsonFieldType must be present - this should have been caught by the selector"));

return prepareFilterStringForInOperator(
parsedLhs, parsedRhs, fieldType, context.getParamsBuilder());
}

private String prepareFilterStringForInOperator(
final String parsedLhs,
final Iterable<Object> parsedRhs,
final JsonFieldType fieldType,
final Params.Builder paramsBuilder) {

// Determine the appropriate type cast for jsonb_build_array elements
String typeCast = getTypeCastForArray(fieldType);

// For JSON arrays, we use the @> containment operator
// Check if ANY of the RHS values is contained in the LHS array
String orConditions =
StreamSupport.stream(parsedRhs.spliterator(), false)
.map(
value -> {
paramsBuilder.addObjectParam(value);
return String.format("%s @> jsonb_build_array(?%s)", parsedLhs, typeCast);
})
.collect(Collectors.joining(" OR "));

// Wrap in parentheses if multiple conditions
return StreamSupport.stream(parsedRhs.spliterator(), false).count() > 1
? String.format("(%s)", orConditions)
: orConditions;
}

/**
* Returns the PostgreSQL type cast string for jsonb_build_array elements based on array type.
*
* @param fieldType The JSON field type (must not be null)
* @return Type cast string (e.g., "::text", "::numeric")
*/
private String getTypeCastForArray(JsonFieldType fieldType) {
switch (fieldType) {
case STRING_ARRAY:
return "::text";
case NUMBER_ARRAY:
return "::numeric";
case BOOLEAN_ARRAY:
return "::boolean";
case OBJECT_ARRAY:
return "::jsonb";
default:
throw new IllegalArgumentException(
"Unsupported array type: " + fieldType + ". Expected *_ARRAY types.");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;

import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
import org.hypertrace.core.documentstore.postgres.Params;

/**
* Optimized parser for IN operations on JSON primitive fields (string, number, boolean) with proper
* type casting.
*
* <p>Generates efficient SQL using {@code ->>} operator with appropriate PostgreSQL casting:
*
* <ul>
* <li><b>STRING:</b> {@code "document" ->> 'item' IN ('Soap', 'Shampoo')}
* <li><b>NUMBER:</b> {@code CAST("document" ->> 'price' AS NUMERIC) IN (10, 20)}
* <li><b>BOOLEAN:</b> {@code CAST("document" ->> 'active' AS BOOLEAN) IN (true, false)}
* </ul>
*
* <p>This is much more efficient than the defensive approach that checks both array and scalar
* types, and ensures correct type comparisons.
*/
public class PostgresInRelationalFilterParserJsonPrimitive
implements PostgresInRelationalFilterParserInterface {

@Override
public String parse(
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
String parsedLhs = expression.getLhs().accept(context.lhsParser());
final Iterable<Object> parsedRhs = expression.getRhs().accept(context.rhsParser());

// Extract field type for proper casting (guaranteed to be present by selector)
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
JsonFieldType fieldType =
jsonExpr
.getFieldType()
.orElseThrow(
() ->
new IllegalStateException(
"JsonFieldType must be present - this should have been caught by the selector"));

// For JSON primitives, we need ->> (text extraction) instead of -> (jsonb extraction)
// The LHS parser generates: "props"->'brand' (returns JSONB)
// We need: "props"->>'brand' (returns TEXT)
// Replace the last -> with ->> for primitive type extraction
int lastArrowIndex = parsedLhs.lastIndexOf("->");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is tweaking the parser to not do this a bigger lift?

if (lastArrowIndex != -1) {
parsedLhs =
parsedLhs.substring(0, lastArrowIndex) + "->>" + parsedLhs.substring(lastArrowIndex + 2);
}

return prepareFilterStringForInOperator(
parsedLhs, parsedRhs, fieldType, context.getParamsBuilder());
}

private String prepareFilterStringForInOperator(
final String parsedLhs,
final Iterable<Object> parsedRhs,
final JsonFieldType fieldType,
final Params.Builder paramsBuilder) {

String placeholders =
StreamSupport.stream(parsedRhs.spliterator(), false)
.map(
value -> {
paramsBuilder.addObjectParam(value);
return "?";
})
.collect(Collectors.joining(", "));

// Apply appropriate casting based on field type
String lhsWithCast = parsedLhs;
if (fieldType == JsonFieldType.NUMBER) {
lhsWithCast = String.format("CAST(%s AS NUMERIC)", parsedLhs);
} else if (fieldType == JsonFieldType.BOOLEAN) {
lhsWithCast = String.format("CAST(%s AS BOOLEAN)", parsedLhs);
}
// STRING or null fieldType: no casting needed

return String.format("%s IN (%s)", lhsWithCast, placeholders);
}
}
Loading
Loading