Skip to content

Commit da884dd

Browse files
committed
contains any?
1 parent af125e7 commit da884dd

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

tsl/src/compression/batch_metadata_builder_bloom1.c

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,123 @@ bloom1_contains(PG_FUNCTION_ARGS)
469469
PG_RETURN_BOOL(true);
470470
}
471471

472+
static int
473+
uint64_qsort_cmp(const void *a, const void *b)
474+
{
475+
return pg_cmp_u64(*(const uint64 *) a, *(const uint64 *) b);
476+
}
477+
478+
Datum
479+
bloom1_contains_any(PG_FUNCTION_ARGS)
480+
{
481+
/*
482+
* This function is not strict, because if we don't have a bloom filter, this
483+
* means the condition can potentially be true.
484+
*/
485+
if (PG_ARGISNULL(0))
486+
{
487+
PG_RETURN_BOOL(true);
488+
}
489+
490+
/*
491+
* A null value cannot match the equality condition, although this probably
492+
* should be optimized away by the planner.
493+
*/
494+
if (PG_ARGISNULL(1))
495+
{
496+
PG_RETURN_BOOL(false);
497+
}
498+
499+
ArrayType *arr = PG_GETARG_ARRAYTYPE_P(1);
500+
int nelems;
501+
Datum *values;
502+
bool *nulls;
503+
504+
Oid elem_type = ARR_ELEMTYPE(arr);
505+
506+
FmgrInfo *finfo = NULL;
507+
PGFunction hash_fn = bloom1_get_hash_function(elem_type, &finfo);
508+
if (hash_fn == NULL)
509+
ereport(ERROR,
510+
(errcode(ERRCODE_DATA_EXCEPTION),
511+
errmsg("type %s lacks an extended hash function",
512+
format_type_be(elem_type))));
513+
514+
int16 typlen; bool typbyval; char typalign;
515+
get_typlenbyvalalign(elem_type, &typlen, &typbyval, &typalign);
516+
deconstruct_array(arr, elem_type, typlen, typbyval, typalign,
517+
&values, &nulls, &nelems);
518+
519+
if (nelems == 0)
520+
PG_RETURN_BOOL(false);
521+
522+
/* --- hash elements -------------------------------------------------- */
523+
#if FLOAT8PASSBYVAL
524+
uint64 *hashes = values;
525+
#else
526+
uint64 *hashes = palloc(sizeof(uint64) * nelems);
527+
#endif
528+
529+
int valid = 0;
530+
531+
for (int i = 0; i < nelems; i++)
532+
{
533+
if (nulls[i])
534+
{
535+
/*
536+
* A null value cannot match the equality condition.
537+
*/
538+
continue;
539+
}
540+
hashes[valid++] = calculate_hash(hash_fn, finfo, values[i]);
541+
}
542+
543+
if (valid == 0)
544+
{
545+
/*
546+
* No non-null elements.
547+
*/
548+
PG_RETURN_BOOL(false);
549+
}
550+
551+
/*
552+
* Sort the hashes for cache-friendly probing.
553+
*/
554+
pg_qsort(hashes, valid, sizeof(uint64), uint64_qsort_cmp);
555+
556+
/* Probe the bloom filter. */
557+
struct varlena *bloom = PG_GETARG_VARLENA_P(0);
558+
const uint64 *words = (const uint64 *) bloom1_words_buf(bloom);
559+
uint32 num_bits = bloom1_num_bits(bloom); /* power of two */
560+
uint32 abs_mask = num_bits - 1;
561+
uint32 word_bits = sizeof(*words) * 8;
562+
uint32 word_shift = pg_leftmost_one_pos32(word_bits);
563+
uint32 word_mask = word_bits - 1;
564+
565+
for (int i = 0; i < valid; i++)
566+
{
567+
uint64 h = hashes[i];
568+
bool match = true;
569+
570+
for (int k = 0; k < BLOOM1_HASHES; k++)
571+
{
572+
uint32 bit = bloom1_get_one_offset(h, k) & abs_mask;
573+
uint32 word = bit >> word_shift;
574+
uint32 off = bit & word_mask;
575+
576+
if ((words[word] & (UINT64_C(1) << off)) == 0)
577+
{
578+
match = false;
579+
break;
580+
}
581+
}
582+
if (match)
583+
PG_RETURN_BOOL(true); /* any element present */
584+
}
585+
586+
PG_RETURN_BOOL(false);
587+
}
588+
472589
static int
473590
bloom1_varlena_alloc_size(int num_bits)
474591
{

tsl/src/compression/sparse_index_bloom1.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@
99

1010
Datum bloom1_contains(PG_FUNCTION_ARGS);
1111

12+
Datum bloom1_contains_any(PG_FUNCTION_ARGS);
13+
1214
PGFunction bloom1_get_hash_function(Oid type, FmgrInfo **finfo);

0 commit comments

Comments
 (0)