Skip to content

Commit f081da6

Browse files
authored
feat: improve scan IO profile metrics (#18975)
* feat(profile): split scan io metrics and show in explain * revert: only show non-zero scan io metrics * fix: always display scan IO metrics in EXPLAIN ANALYZE for consistent output - Add bytes scanned from local cache to test expectations - Make scan IO metrics (remote/local/memory) always display even when zero to ensure consistent output format for sqllogic tests * fix: only display scan IO metrics when any of them is non-zero This ensures: - Fuse table scans show all three scan IO metrics together (remote/local/memory) - System tables (like numbers) don't show these metrics since they don't use Fuse storage * fix: use ProfileStatisticsName enum for scan IO metrics matching Use enum comparison instead of string comparison for more reliable matching * fix: display all scan IO metrics together when any is non-zero The fix was applied to the correct file: physical_format.rs in databend-query. The previous fix in sql/executor/format.rs was ineffective because that file is not compiled (not declared in mod.rs). When any of the three scan IO metrics (remote/local cache/memory cache) is non-zero, all three will be displayed. This ensures consistent output in EXPLAIN ANALYZE for Fuse table scans.
1 parent 05e8630 commit f081da6

File tree

8 files changed

+100
-21
lines changed

8 files changed

+100
-21
lines changed

src/common/base/src/runtime/profile/profiles.rs

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,11 @@ pub enum ProfileStatisticsName {
3535
OutputRows,
3636
OutputBytes,
3737
ScanBytes,
38-
ScanCacheBytes,
3938
ScanPartitions,
39+
ScanBytesFromRemote,
40+
ScanBytesFromLocalDisk,
41+
ScanBytesFromDataCache,
42+
ScanBytesFromMemory,
4043

4144
RemoteSpillWriteCount,
4245
RemoteSpillWriteBytes,
@@ -190,20 +193,41 @@ pub fn get_statistics_desc() -> Arc<BTreeMap<ProfileStatisticsName, ProfileDesc>
190193
unit: StatisticsUnit::Bytes,
191194
plain_statistics: true,
192195
}),
193-
(ProfileStatisticsName::ScanCacheBytes, ProfileDesc {
194-
display_name: "bytes scanned from cache",
195-
desc: "The bytes scanned from cache of query",
196-
index: ProfileStatisticsName::ScanCacheBytes as usize,
197-
unit: StatisticsUnit::Bytes,
198-
plain_statistics: true,
199-
}),
200196
(ProfileStatisticsName::ScanPartitions, ProfileDesc {
201197
display_name: "partitions scanned",
202198
desc: "The partitions scanned of query",
203199
index: ProfileStatisticsName::ScanPartitions as usize,
204200
unit: StatisticsUnit::Count,
205201
plain_statistics: true,
206202
}),
203+
(ProfileStatisticsName::ScanBytesFromRemote, ProfileDesc {
204+
display_name: "read from remote",
205+
desc: "The bytes read from remote storage",
206+
index: ProfileStatisticsName::ScanBytesFromRemote as usize,
207+
unit: StatisticsUnit::Bytes,
208+
plain_statistics: true,
209+
}),
210+
(ProfileStatisticsName::ScanBytesFromLocalDisk, ProfileDesc {
211+
display_name: "read from local disk",
212+
desc: "The bytes read from local disk cache",
213+
index: ProfileStatisticsName::ScanBytesFromLocalDisk as usize,
214+
unit: StatisticsUnit::Bytes,
215+
plain_statistics: true,
216+
}),
217+
(ProfileStatisticsName::ScanBytesFromDataCache, ProfileDesc {
218+
display_name: "read from data cache",
219+
desc: "The bytes read from data cache memory",
220+
index: ProfileStatisticsName::ScanBytesFromDataCache as usize,
221+
unit: StatisticsUnit::Bytes,
222+
plain_statistics: true,
223+
}),
224+
(ProfileStatisticsName::ScanBytesFromMemory, ProfileDesc {
225+
display_name: "read from memory cache",
226+
desc: "The bytes read from memory cache",
227+
index: ProfileStatisticsName::ScanBytesFromMemory as usize,
228+
unit: StatisticsUnit::Bytes,
229+
plain_statistics: true,
230+
}),
207231
(ProfileStatisticsName::RemoteSpillWriteCount, ProfileDesc {
208232
display_name: "numbers remote spilled by write",
209233
desc: "The number of remote spilled by write",

src/query/sql/src/executor/format.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,10 @@ impl PhysicalPlan {
138138
};
139139

140140
Ok(FormatTreeNode::with_children(
141-
format!("HashJoin: {} estimated_rows: {}", plan.join_type, estimated_rows),
141+
format!(
142+
"HashJoin: {} estimated_rows: {}",
143+
plan.join_type, estimated_rows
144+
),
142145
children,
143146
))
144147
}

src/query/storages/common/cache/src/providers/disk_cache_builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ impl CacheAccessor for DiskCacheAccessor {
118118
metrics_inc_cache_access_count(1, &self.name);
119119
let k = k.as_ref();
120120
if let Some(item) = self.lru_disk_cache.get(k) {
121-
Profile::record_usize_profile(ProfileStatisticsName::ScanCacheBytes, item.len());
121+
let size = item.len();
122+
Profile::record_usize_profile(ProfileStatisticsName::ScanBytesFromLocalDisk, size);
122123
metrics_inc_cache_hit_count(1, &self.name);
123124
Some(item)
124125
} else {

src/query/storages/common/cache/src/providers/hybrid_cache.rs

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
use std::sync::Arc;
1616

1717
use bytes::Bytes;
18+
use databend_common_base::runtime::profile::Profile;
19+
use databend_common_base::runtime::profile::ProfileStatisticsName;
1820
use databend_common_exception::ErrorCode;
1921
use databend_common_metrics::cache::metrics_inc_cache_miss_bytes;
2022
use log::warn;
@@ -152,10 +154,14 @@ where
152154

153155
fn get<Q: AsRef<str>>(&self, k: Q) -> Option<Arc<Self::V>> {
154156
if let Some(item) = self.memory_cache.get(k.as_ref()) {
157+
// Record memory cache hit
158+
// Note: The actual size recording happens in memory_cache.get()
155159
// try putting it bach to on-disk cache if necessary
156160
self.insert_to_disk_cache_if_necessary(k.as_ref(), item.as_ref());
157161
Some(item)
158162
} else if let Some(bytes) = self.disk_cache.get(k.as_ref()) {
163+
// Record disk cache hit
164+
// Note: The actual size recording happens in disk_cache.get()
159165
let bytes = bytes.as_ref().clone();
160166
match bytes.try_into() {
161167
Ok(v) => Some(self.memory_cache.insert(k.as_ref().to_owned(), v)),
@@ -169,21 +175,45 @@ where
169175
}
170176
}
171177
} else {
172-
// Cache Miss
178+
// Cache Miss - will need to read from remote
173179
None
174180
}
175181
}
176182

177183
fn get_sized<Q: AsRef<str>>(&self, k: Q, len: u64) -> Option<Arc<Self::V>> {
178-
let Some(cached_value) = self.get(k) else {
179-
// Both in-memory and on-disk cache are missed, record it
180-
let in_memory_cache_name = self.memory_cache.name();
181-
let on_disk_cache_name = self.disk_cache.name();
182-
metrics_inc_cache_miss_bytes(len, in_memory_cache_name);
183-
metrics_inc_cache_miss_bytes(len, on_disk_cache_name);
184-
return None;
185-
};
186-
Some(cached_value)
184+
// First, check in-memory cache
185+
if let Some(item) = self.memory_cache.get(k.as_ref()) {
186+
// Record bytes read from data cache memory
187+
Profile::record_usize_profile(
188+
ProfileStatisticsName::ScanBytesFromDataCache,
189+
len as usize,
190+
);
191+
// try putting it back to on-disk cache if necessary
192+
self.insert_to_disk_cache_if_necessary(k.as_ref(), item.as_ref());
193+
return Some(item);
194+
}
195+
196+
// Then, check on-disk cache (ScanBytesFromLocalDisk is recorded inside disk_cache.get)
197+
if let Some(bytes) = self.disk_cache.get(k.as_ref()) {
198+
let bytes = bytes.as_ref().clone();
199+
match bytes.try_into() {
200+
Ok(v) => return Some(self.memory_cache.insert(k.as_ref().to_owned(), v)),
201+
Err(e) => {
202+
let key = k.as_ref();
203+
// Disk cache crc is correct, but failed to deserialize.
204+
// Likely the serialization format has been changed, evict it.
205+
warn!("failed to decode cache value, key {key}, {}", e);
206+
self.disk_cache.evict(key);
207+
}
208+
}
209+
}
210+
211+
// Both in-memory and on-disk cache are missed, record it
212+
let in_memory_cache_name = self.memory_cache.name();
213+
let on_disk_cache_name = self.disk_cache.name();
214+
metrics_inc_cache_miss_bytes(len, in_memory_cache_name);
215+
metrics_inc_cache_miss_bytes(len, on_disk_cache_name);
216+
None
187217
}
188218

189219
fn insert(&self, key: String, value: Self::V) -> Arc<Self::V> {

src/query/storages/common/cache/src/providers/memory_cache.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,9 @@ mod impls {
9797
let mut guard = self.inner.write();
9898
guard
9999
.get(k.as_ref())
100-
.map(|cache_value: &CacheValue<V>| cache_value.get_inner())
100+
.map(|cache_value| cache_value.get_inner())
101101
};
102+
102103
if v.is_none() {
103104
metrics_inc_cache_miss_count(1, &self.name);
104105
} else {

src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
use std::collections::HashMap;
1616
use std::collections::HashSet;
1717

18+
use databend_common_base::runtime::profile::Profile;
19+
use databend_common_base::runtime::profile::ProfileStatisticsName;
1820
use databend_common_exception::Result;
1921
use databend_common_expression::ColumnId;
2022
use databend_common_metrics::storage::*;
@@ -67,6 +69,11 @@ impl BlockReader {
6769
// first, check in memory table data cache
6870
// column_array_cache
6971
if let Some(cache_array) = column_array_cache.get_sized(&column_cache_key, len) {
72+
// Record bytes scanned from memory cache (table data only)
73+
Profile::record_usize_profile(
74+
ProfileStatisticsName::ScanBytesFromMemory,
75+
len as usize,
76+
);
7077
cached_column_array.push((*column_id, cache_array));
7178
continue;
7279
}
@@ -87,6 +94,12 @@ impl BlockReader {
8794
metrics_inc_remote_io_seeks(1);
8895
metrics_inc_remote_io_read_bytes(len);
8996
}
97+
98+
// Record bytes scanned from remote storage
99+
Profile::record_usize_profile(
100+
ProfileStatisticsName::ScanBytesFromRemote,
101+
len as usize,
102+
);
90103
}
91104
}
92105

tests/sqllogictests/suites/mode/cluster/explain_analyze.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ Exchange
128128
├── output rows: 6
129129
├── output bytes: 120.00 B
130130
├── bytes scanned: 120.00 B
131+
├── read from remote: <slt:ignore>
131132
├── runtime filter inlist/min-max time: <slt:ignore>
132133
├── table: default.default.article
133134
├── scan id: 0
@@ -159,6 +160,7 @@ Exchange
159160
├── output rows: 4
160161
├── output bytes: 80.00 B
161162
├── bytes scanned: 80.00 B
163+
├── read from remote: <slt:ignore>
162164
├── runtime filter inlist/min-max time: <slt:ignore>
163165
├── table: default.default.article
164166
├── scan id: 0
@@ -208,6 +210,7 @@ Exchange
208210
│ ├── output rows: 1
209211
│ ├── output bytes: <slt:ignore>
210212
│ ├── bytes scanned: <slt:ignore>
213+
│ ├── read from remote: <slt:ignore>
211214
│ ├── runtime filter inlist/min-max time: <slt:ignore>
212215
│ ├── table: default.default.author
213216
│ ├── scan id: 1
@@ -225,6 +228,7 @@ Exchange
225228
├── output rows: 6
226229
├── output bytes: 120.00 B
227230
├── bytes scanned: 120.00 B
231+
├── read from remote: <slt:ignore>
228232
├── runtime filter inlist/min-max time: <slt:ignore>
229233
├── table: default.default.article
230234
├── scan id: 0

tests/sqllogictests/suites/mode/standalone/explain/explain_analyze.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Filter
138138
├── output rows: 4
139139
├── output bytes: 80.00 B
140140
├── bytes scanned: 80.00 B
141+
├── read from remote: <slt:ignore>
141142
├── runtime filter inlist/min-max time: <slt:ignore>
142143
├── table: default.default.article
143144
├── scan id: 0
@@ -176,6 +177,7 @@ HashJoin
176177
│ ├── output rows: 1
177178
│ ├── output bytes: <slt:ignore>
178179
│ ├── bytes scanned: <slt:ignore>
180+
│ ├── read from remote: <slt:ignore>
179181
│ ├── runtime filter inlist/min-max time: <slt:ignore>
180182
│ ├── table: default.default.author
181183
│ ├── scan id: 1
@@ -193,6 +195,7 @@ HashJoin
193195
├── output rows: 6
194196
├── output bytes: 120.00 B
195197
├── bytes scanned: 120.00 B
198+
├── read from remote: <slt:ignore>
196199
├── runtime filter inlist/min-max time: <slt:ignore>
197200
├── table: default.default.article
198201
├── scan id: 0

0 commit comments

Comments
 (0)