Skip to content

Commit 96fb78a

Browse files
authored
Merge pull request #4 from zhyass/fix_purge
Fix purge with older version bug
2 parents c713578 + 3cec815 commit 96fb78a

File tree

10 files changed

+244
-91
lines changed

10 files changed

+244
-91
lines changed

src/query/service/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#![allow(clippy::uninlined_format_args)]
1616
#![feature(thread_local)]
17+
#![feature(io_error_other)]
1718

1819
extern crate core;
1920

src/query/service/tests/it/storages/fuse/operations/gc.rs

Lines changed: 134 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,23 @@
1616
use std::sync::Arc;
1717

1818
use chrono::Duration;
19+
use chrono::Utc;
1920
use common_base::base::tokio;
2021
use common_catalog::table_context::TableContext;
2122
use common_exception::Result;
2223
use common_expression::DataBlock;
24+
use common_storages_factory::Table;
2325
use common_storages_fuse::io::MetaWriter;
2426
use common_storages_fuse::io::SegmentWriter;
2527
use common_storages_fuse::statistics::gen_columns_statistics;
28+
use common_storages_fuse::statistics::merge_statistics;
2629
use common_storages_fuse::FuseTable;
2730
use futures_util::TryStreamExt;
2831
use storages_common_table_meta::meta::Location;
2932
use storages_common_table_meta::meta::SegmentInfo;
3033
use storages_common_table_meta::meta::Statistics;
3134
use storages_common_table_meta::meta::TableSnapshot;
35+
use storages_common_table_meta::meta::TableSnapshotV2;
3236
use storages_common_table_meta::meta::Versioned;
3337
use uuid::Uuid;
3438

@@ -191,6 +195,7 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> {
191195
fuse_table,
192196
segment_locations.clone(),
193197
Some(new_timestamp),
198+
TableSnapshot::VERSION,
194199
)
195200
.await?;
196201

@@ -200,6 +205,7 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> {
200205
fuse_table,
201206
segment_locations.clone(),
202207
Some(new_timestamp),
208+
TableSnapshot::VERSION,
203209
)
204210
.await?;
205211

@@ -215,6 +221,7 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> {
215221
fuse_table,
216222
segment_locations.clone(),
217223
Some(new_timestamp),
224+
TableSnapshot::VERSION,
218225
)
219226
.await?;
220227
}
@@ -243,29 +250,144 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> {
243250
Ok(())
244251
}
245252

253+
#[tokio::test(flavor = "multi_thread")]
254+
async fn test_fuse_purge_older_version() -> Result<()> {
255+
let fixture = TestFixture::new().await;
256+
let ctx = fixture.ctx();
257+
fixture.create_default_table().await?;
258+
259+
let now = Utc::now();
260+
261+
let schema = TestFixture::default_table_schema();
262+
let mut table = fixture.latest_default_table().await?;
263+
let mut fuse_table = FuseTable::try_from_table(table.as_ref())?;
264+
let location_gen = fuse_table.meta_location_generator();
265+
let operator = fuse_table.get_operator();
266+
267+
{
268+
let num_of_segments = 3;
269+
let blocks_per_segment = 2;
270+
let segments =
271+
utils::generate_segments(fuse_table, num_of_segments, blocks_per_segment).await?;
272+
273+
// create snapshot 0, the format version is 2.
274+
let locations = vec![segments[0].0.clone()];
275+
let id = Uuid::new_v4();
276+
let mut snapshot_0 = TableSnapshotV2::new(
277+
id,
278+
&None,
279+
None,
280+
schema.as_ref().clone(),
281+
segments[0].1.summary.clone(),
282+
locations,
283+
None,
284+
None,
285+
);
286+
snapshot_0.timestamp = Some(now - Duration::hours(13));
287+
288+
let new_snapshot_location = location_gen
289+
.snapshot_location_from_uuid(&snapshot_0.snapshot_id, TableSnapshotV2::VERSION)?;
290+
utils::write_snapshot_v2(&operator, &new_snapshot_location, &snapshot_0).await?;
291+
292+
// create snapshot 1, the format version is 3.
293+
let mut locations = Vec::with_capacity(2);
294+
for i in [1, 0] {
295+
locations.push(segments[i].0.clone());
296+
}
297+
let mut snapshot_1 = TableSnapshot::new(
298+
Uuid::new_v4(),
299+
&snapshot_0.timestamp,
300+
Some((snapshot_0.snapshot_id, TableSnapshotV2::VERSION)),
301+
schema.as_ref().clone(),
302+
Statistics::default(),
303+
locations,
304+
None,
305+
None,
306+
);
307+
snapshot_1.timestamp = Some(now - Duration::hours(12));
308+
snapshot_1.summary = merge_statistics(&snapshot_0.summary, &segments[1].1.summary)?;
309+
let new_snapshot_location = location_gen
310+
.snapshot_location_from_uuid(&snapshot_1.snapshot_id, TableSnapshot::VERSION)?;
311+
snapshot_1
312+
.write_meta(&operator, &new_snapshot_location)
313+
.await?;
314+
315+
// create snapshot 2, the format version is 3.
316+
let mut locations = Vec::with_capacity(3);
317+
for i in [2, 1, 0] {
318+
locations.push(segments[i].0.clone());
319+
}
320+
let mut snapshot_2 = TableSnapshot::from_previous(&snapshot_1);
321+
snapshot_2.segments = locations;
322+
snapshot_2.timestamp = Some(now);
323+
snapshot_2.summary = merge_statistics(&snapshot_1.summary, &segments[2].1.summary)?;
324+
let new_snapshot_location = location_gen
325+
.snapshot_location_from_uuid(&snapshot_2.snapshot_id, TableSnapshot::VERSION)?;
326+
snapshot_2
327+
.write_meta(&operator, &new_snapshot_location)
328+
.await?;
329+
FuseTable::commit_to_meta_server(
330+
ctx.as_ref(),
331+
fuse_table.get_table_info(),
332+
location_gen,
333+
snapshot_2,
334+
None,
335+
&None,
336+
&operator,
337+
)
338+
.await?;
339+
}
340+
341+
let table_ctx: Arc<dyn TableContext> = ctx.clone();
342+
table = fixture.latest_default_table().await?;
343+
fuse_table = FuseTable::try_from_table(table.as_ref())?;
344+
fuse_table.do_purge(&table_ctx, true).await?;
345+
346+
let expected_num_of_snapshot = 1;
347+
let expected_num_of_segment = 3;
348+
let expected_num_of_blocks = 6;
349+
let expected_num_of_index = expected_num_of_blocks;
350+
check_data_dir(
351+
&fixture,
352+
"do_gc: retention period is 0",
353+
expected_num_of_snapshot,
354+
0,
355+
expected_num_of_segment,
356+
expected_num_of_blocks,
357+
expected_num_of_index,
358+
Some(()),
359+
None,
360+
)
361+
.await?;
362+
Ok(())
363+
}
364+
246365
mod utils {
366+
use std::io::Error;
247367
use std::sync::Arc;
248368

249369
use chrono::DateTime;
250370
use chrono::Utc;
251371
use common_storages_factory::Table;
252372
use common_storages_fuse::io::MetaWriter;
253373
use common_storages_fuse::FuseStorageFormat;
374+
use opendal::Operator;
254375

255376
use super::*;
256377

257378
pub async fn generate_snapshot_with_segments(
258379
fuse_table: &FuseTable,
259380
segment_locations: Vec<Location>,
260381
time_stamp: Option<DateTime<Utc>>,
382+
version: u64,
261383
) -> Result<String> {
262384
let current_snapshot = fuse_table.read_table_snapshot().await?.unwrap();
263385
let operator = fuse_table.get_operator();
264386
let location_gen = fuse_table.meta_location_generator();
265387
let mut new_snapshot = TableSnapshot::from_previous(current_snapshot.as_ref());
266388
new_snapshot.segments = segment_locations;
267-
let new_snapshot_location = location_gen
268-
.snapshot_location_from_uuid(&new_snapshot.snapshot_id, TableSnapshot::VERSION)?;
389+
let new_snapshot_location =
390+
location_gen.snapshot_location_from_uuid(&new_snapshot.snapshot_id, version)?;
269391
if let Some(ts) = time_stamp {
270392
new_snapshot.timestamp = Some(ts)
271393
}
@@ -320,4 +442,14 @@ mod utils {
320442
let segment_location = segment_writer.write_segment_no_cache(&segment_info).await?;
321443
Ok((segment_location, segment_info))
322444
}
445+
446+
pub async fn write_snapshot_v2(
447+
data_accessor: &Operator,
448+
location: &str,
449+
meta: &TableSnapshotV2,
450+
) -> Result<()> {
451+
let bs = serde_json::to_vec(&meta).map_err(Error::other)?;
452+
data_accessor.write(location, bs).await?;
453+
Ok(())
454+
}
323455
}

src/query/storages/common/table-meta/src/meta/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ mod compression;
1818
/// Re-exports meta data structures of current version, i.e. v1
1919
mod current;
2020
mod format;
21+
mod utils;
2122
mod v0;
2223
mod v1;
2324
mod v2;
@@ -37,6 +38,7 @@ pub use statistics::Location;
3738
pub use statistics::SnapshotId;
3839
pub use statistics::Statistics;
3940
pub use statistics::StatisticsOfColumns;
41+
pub use utils::*;
4042
pub use versions::testify_version;
4143
pub use versions::SegmentInfoVersion;
4244
pub use versions::SnapshotVersion;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright 2023 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::ops::Add;
16+
17+
use chrono::DateTime;
18+
use chrono::Datelike;
19+
use chrono::TimeZone;
20+
use chrono::Timelike;
21+
use chrono::Utc;
22+
23+
pub fn trim_timestamp_to_micro_second(ts: DateTime<Utc>) -> DateTime<Utc> {
24+
Utc.with_ymd_and_hms(
25+
ts.year(),
26+
ts.month(),
27+
ts.day(),
28+
ts.hour(),
29+
ts.minute(),
30+
ts.second(),
31+
)
32+
.unwrap()
33+
.with_nanosecond(ts.timestamp_subsec_micros() * 1_000)
34+
.unwrap()
35+
}
36+
37+
pub fn monotonically_increased_timestamp(
38+
timestamp: DateTime<Utc>,
39+
previous_timestamp: &Option<DateTime<Utc>>,
40+
) -> DateTime<Utc> {
41+
if let Some(prev_instant) = previous_timestamp {
42+
// timestamp of the snapshot should always larger than the previous one's
43+
if prev_instant > &timestamp {
44+
// if local time is smaller, use the timestamp of previous snapshot, plus 1 ms
45+
return prev_instant.add(chrono::Duration::milliseconds(1));
46+
}
47+
}
48+
timestamp
49+
}

src/query/storages/common/table-meta/src/meta/v1/snapshot.rs

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::ops::Add;
16-
1715
use chrono::DateTime;
1816
use chrono::Utc;
1917
use common_base::base::uuid::Uuid;
@@ -22,7 +20,9 @@ use serde::Deserialize;
2220
use serde::Serialize;
2321

2422
use super::super::v0::statistics::Statistics;
23+
use crate::meta::monotonically_increased_timestamp;
2524
use crate::meta::statistics::FormatVersion;
25+
use crate::meta::trim_timestamp_to_micro_second;
2626
use crate::meta::ClusterKey;
2727
use crate::meta::Location;
2828
use crate::meta::SnapshotId;
@@ -74,10 +74,10 @@ impl TableSnapshot {
7474
) -> Self {
7575
let now = Utc::now();
7676
// make snapshot timestamp monotonically increased
77-
let adjusted_timestamp = util::monotonically_increased_timestamp(now, prev_timestamp);
77+
let adjusted_timestamp = monotonically_increased_timestamp(now, prev_timestamp);
7878

7979
// trim timestamp to micro seconds
80-
let trimmed_timestamp = util::trim_timestamp_to_micro_second(adjusted_timestamp);
80+
let trimmed_timestamp = trim_timestamp_to_micro_second(adjusted_timestamp);
8181
let timestamp = Some(trimmed_timestamp);
8282

8383
Self {
@@ -163,40 +163,3 @@ impl From<&TableSnapshot> for TableSnapshotLite {
163163
}
164164
}
165165
}
166-
167-
mod util {
168-
use chrono::DateTime;
169-
use chrono::Datelike;
170-
use chrono::TimeZone;
171-
use chrono::Timelike;
172-
use chrono::Utc;
173-
174-
use super::*;
175-
pub fn trim_timestamp_to_micro_second(ts: DateTime<Utc>) -> DateTime<Utc> {
176-
Utc.with_ymd_and_hms(
177-
ts.year(),
178-
ts.month(),
179-
ts.day(),
180-
ts.hour(),
181-
ts.minute(),
182-
ts.second(),
183-
)
184-
.unwrap()
185-
.with_nanosecond(ts.timestamp_subsec_micros() * 1_000)
186-
.unwrap()
187-
}
188-
189-
pub fn monotonically_increased_timestamp(
190-
timestamp: DateTime<Utc>,
191-
previous_timestamp: &Option<DateTime<Utc>>,
192-
) -> DateTime<Utc> {
193-
if let Some(prev_instant) = previous_timestamp {
194-
// timestamp of the snapshot should always larger than the previous one's
195-
if prev_instant > &timestamp {
196-
// if local time is smaller, use the timestamp of previous snapshot, plus 1 ms
197-
return prev_instant.add(chrono::Duration::milliseconds(1));
198-
}
199-
}
200-
timestamp
201-
}
202-
}

0 commit comments

Comments
 (0)