Skip to content

Commit 7b3994b

Browse files
committed
uniq: add benchmarks
1 parent 52c71dc commit 7b3994b

File tree

3 files changed

+120
-0
lines changed

3 files changed

+120
-0
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/uniq/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ clap = { workspace = true }
2222
uucore = { workspace = true, features = ["parser"] }
2323
fluent = { workspace = true }
2424

25+
[dev-dependencies]
26+
divan = { workspace = true }
27+
tempfile = { workspace = true }
28+
uucore = { workspace = true, features = ["benchmark", "parser"] }
29+
2530
[[bin]]
2631
name = "uniq"
2732
path = "src/main.rs"

src/uu/uniq/benches/uniq_bench.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use uu_uniq::uumain;
8+
use uucore::benchmark::{run_util_function, setup_test_file};
9+
10+
/// Generate data with many consecutive duplicate lines
11+
/// This directly tests the core optimization of PR #8703 - avoiding allocations when comparing lines
12+
fn generate_duplicate_heavy_data(num_groups: usize, duplicates_per_group: usize) -> Vec<u8> {
13+
let mut data = Vec::new();
14+
15+
for group in 0..num_groups {
16+
// Generate a line with realistic content
17+
let line = format!(
18+
"Line content for group {group:06} with additional text to make it more realistic for testing performance\n"
19+
);
20+
21+
// Repeat the line multiple times (this is what PR #8703 optimizes)
22+
for _ in 0..duplicates_per_group {
23+
data.extend_from_slice(line.as_bytes());
24+
}
25+
}
26+
27+
data
28+
}
29+
30+
/// Benchmark 1: Heavy duplicates - the main optimization target
31+
/// Many consecutive duplicate lines that stress the line comparison optimization
32+
#[divan::bench(args = [10_000_000])]
33+
fn uniq_heavy_duplicates(bencher: Bencher, num_lines: usize) {
34+
// Create 1000 groups with ~10,000 duplicates each
35+
// This maximizes the benefit of PR #8703's optimization
36+
let num_groups = 1000;
37+
let duplicates_per_group = num_lines / num_groups;
38+
let data = generate_duplicate_heavy_data(num_groups, duplicates_per_group);
39+
let file_path = setup_test_file(&data);
40+
let file_path_str = file_path.to_str().unwrap();
41+
42+
bencher.bench(|| {
43+
black_box(run_util_function(uumain, &[file_path_str]));
44+
});
45+
}
46+
47+
/// Benchmark 2: Mixed duplicates with counting
48+
/// Tests the -c flag with a mix of duplicate groups
49+
#[divan::bench(args = [5_000_000])]
50+
fn uniq_with_count(bencher: Bencher, num_lines: usize) {
51+
// Create more groups with fewer duplicates for varied counting
52+
let num_groups = num_lines / 100;
53+
let data = generate_duplicate_heavy_data(num_groups, 100);
54+
let file_path = setup_test_file(&data);
55+
let file_path_str = file_path.to_str().unwrap();
56+
57+
bencher.bench(|| {
58+
black_box(run_util_function(uumain, &["-c", file_path_str]));
59+
});
60+
}
61+
62+
/// Benchmark 3: Case-insensitive comparison with duplicates
63+
/// Tests the -i flag which requires case folding during comparison
64+
#[divan::bench(args = [2_000_000])]
65+
fn uniq_case_insensitive(bencher: Bencher, num_lines: usize) {
66+
let mut data = Vec::new();
67+
let words = [
68+
"Hello",
69+
"WORLD",
70+
"Testing",
71+
"UNIQ",
72+
"Benchmark",
73+
"Performance",
74+
];
75+
76+
// Generate groups of case variations
77+
for i in 0..num_lines {
78+
let word = words[(i / 50) % words.len()];
79+
80+
// Create case variations that should be treated as duplicates with -i
81+
let variation = match i % 4 {
82+
0 => word.to_lowercase(),
83+
1 => word.to_uppercase(),
84+
2 => word.to_string(),
85+
_ => {
86+
// Mixed case
87+
word.chars()
88+
.enumerate()
89+
.map(|(idx, c)| {
90+
if idx % 2 == 0 {
91+
c.to_lowercase().to_string()
92+
} else {
93+
c.to_uppercase().to_string()
94+
}
95+
})
96+
.collect()
97+
}
98+
};
99+
100+
data.extend_from_slice(format!("{variation}\n").as_bytes());
101+
}
102+
103+
let file_path = setup_test_file(&data);
104+
let file_path_str = file_path.to_str().unwrap();
105+
106+
bencher.bench(|| {
107+
black_box(run_util_function(uumain, &["-i", file_path_str]));
108+
});
109+
}
110+
111+
fn main() {
112+
divan::main();
113+
}

0 commit comments

Comments
 (0)