Skip to content

Commit bbbe468

Browse files
committed
remove null in bloom index builder
1 parent 87ec0be commit bbbe468

File tree

4 files changed

+11
-33
lines changed

4 files changed

+11
-33
lines changed

src/query/storages/common/index/src/bloom_index.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,11 +398,8 @@ impl BloomIndex {
398398
(column, None)
399399
};
400400

401-
let capacity = validity.map_or(column.len(), |v| v.true_count() + 1);
401+
let capacity = validity.map_or(column.len(), |v| v.true_count());
402402
let mut result = Vec::with_capacity(capacity);
403-
if validity.is_some() {
404-
result.push(0);
405-
}
406403
let column = T::try_downcast_column(column).unwrap();
407404
if let Some(validity) = validity {
408405
let column_iter = T::iter_column(&column);

src/query/storages/fuse/src/io/write/stream/block_builder.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ impl StreamBlockBuilder {
308308
} else {
309309
None
310310
};
311-
let bloom_distinct_count = bloom_index_state
311+
let mut column_distinct_count = bloom_index_state
312312
.as_ref()
313313
.map(|i| i.column_distinct_count.clone())
314314
.unwrap_or_default();
@@ -317,12 +317,10 @@ impl StreamBlockBuilder {
317317
.meta_locations
318318
.block_stats_location(&block_id);
319319
let block_stats_state = self.block_stats_builder.finalize(block_stats_location)?;
320-
let hll_distinct_count = block_stats_state
321-
.as_ref()
322-
.map_or(HashMap::new(), |i| i.column_distinct_count.clone());
323-
let col_stats = self
324-
.column_stats_state
325-
.finalize(bloom_distinct_count, hll_distinct_count)?;
320+
if let Some(state) = &block_stats_state {
321+
column_distinct_count.extend(state.column_distinct_count.clone());
322+
}
323+
let col_stats = self.column_stats_state.finalize(column_distinct_count)?;
326324

327325
let mut inverted_index_states = Vec::with_capacity(self.inverted_index_writers.len());
328326
for (i, inverted_index_writer) in std::mem::take(&mut self.inverted_index_writers)

src/query/storages/fuse/src/io/write/stream/column_statistics_state.rs

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,29 +88,17 @@ impl ColumnStatisticsState {
8888

8989
pub fn finalize(
9090
self,
91-
bloom_distinct_count: HashMap<ColumnId, usize>,
92-
hll_distinct_count: HashMap<ColumnId, usize>,
91+
mut column_distinct_count: HashMap<ColumnId, usize>,
9392
) -> Result<StatisticsOfColumns> {
94-
let mut distinct_count =
95-
HashMap::with_capacity(self.distinct_columns.len() + hll_distinct_count.len());
9693
for (column_id, estimator) in &self.distinct_columns {
97-
distinct_count.insert(*column_id, estimator.finalize());
94+
column_distinct_count.insert(*column_id, estimator.finalize());
9895
}
99-
distinct_count.extend(hll_distinct_count);
10096

10197
let mut statistics = StatisticsOfColumns::with_capacity(self.col_stats.len());
10298
for (id, stats) in self.col_stats {
10399
let mut col_stats = stats.finalize()?;
104-
if let Some(count) = distinct_count.get(&id) {
100+
if let Some(count) = column_distinct_count.get(&id) {
105101
col_stats.distinct_of_values = Some(*count as u64);
106-
} else if let Some(&count) = bloom_distinct_count.get(&id) {
107-
// value calculated by xor hash function include NULL, need to subtract one.
108-
let distinct_of_values = if col_stats.null_count > 0 && count > 0 {
109-
count as u64 - 1
110-
} else {
111-
count as u64
112-
};
113-
col_stats.distinct_of_values = Some(distinct_of_values);
114102
} else if col_stats.min == col_stats.max {
115103
// Bloom index will skip the large string column, it also no need to calc distinct values.
116104
if col_stats.min.is_null() {
@@ -188,7 +176,7 @@ mod tests {
188176
}
189177
let mut column_stats_state = ColumnStatisticsState::new(&stats_columns, &stats_columns);
190178
column_stats_state.add_block(&schema, &block)?;
191-
let stats_1 = column_stats_state.finalize(HashMap::new(), HashMap::new())?;
179+
let stats_1 = column_stats_state.finalize(HashMap::new())?;
192180

193181
assert_eq!(stats_0, stats_1);
194182
Ok(())

src/query/storages/fuse/src/statistics/column_statistic.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,7 @@ pub fn gen_columns_statistics(
124124
.as_ref()
125125
.and_then(|v| v.get(&column_id))
126126
{
127-
// value calculated by xor hash function include NULL, need to subtract one.
128-
if unset_bits > 0 && value > 0 {
129-
value as u64 - 1
130-
} else {
131-
value as u64
132-
}
127+
value as u64
133128
} else {
134129
calc_column_distinct_of_values(&col, rows)?
135130
};

0 commit comments

Comments
 (0)