Skip to content

Commit 0ed47cd

Browse files
authored
Merge pull request #74 from sched-ext/scx-rustland-multicore-fixes
scx_rustland: multicore fixes
2 parents b008830 + 1ea5aeb commit 0ed47cd

File tree

3 files changed

+42
-21
lines changed

3 files changed

+42
-21
lines changed

scheds/rust/scx_rustland/src/bpf.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use crate::bpf_intf;
77
use crate::bpf_skel::*;
88

99
use std::ffi::CStr;
10+
use std::fs::File;
11+
use std::io::{self, BufRead};
1012

1113
use anyhow::Context;
1214
use anyhow::Result;
@@ -200,6 +202,13 @@ impl<'a> BpfScheduler<'a> {
200202
let skel_builder = BpfSkelBuilder::default();
201203
let mut skel = skel_builder.open().context("Failed to open BPF program")?;
202204

205+
// Initialize online CPUs counter.
206+
//
207+
// We should probably refresh this counter during the normal execution to support cpu
208+
// hotplugging, but for now let's keep it simple and set this only at initialization).
209+
let nr_cpus_online = Self::count_cpus()?;
210+
skel.rodata_mut().num_possible_cpus = nr_cpus_online;
211+
203212
// Set scheduler options (defined in the BPF part).
204213
skel.bss_mut().usersched_pid = std::process::id();
205214
skel.rodata_mut().slice_ns = slice_us * 1000;
@@ -226,6 +235,30 @@ impl<'a> BpfScheduler<'a> {
226235
}
227236
}
228237

238+
// Return the amount of available CPUs in the system (according to /proc/stat).
239+
fn count_cpus() -> io::Result<i32> {
240+
let file = File::open("/proc/stat")?;
241+
let reader = io::BufReader::new(file);
242+
let mut cpu_count = -1;
243+
244+
for line in reader.lines() {
245+
let line = line?;
246+
if line.starts_with("cpu") {
247+
cpu_count += 1;
248+
} else {
249+
break;
250+
}
251+
}
252+
253+
Ok(cpu_count)
254+
}
255+
256+
// Override the default scheduler time slice (in us).
257+
#[allow(dead_code)]
258+
pub fn get_nr_cpus(&self) -> i32 {
259+
self.skel.rodata().num_possible_cpus
260+
}
261+
229262
// Override the default scheduler time slice (in us).
230263
#[allow(dead_code)]
231264
pub fn set_effective_slice_us(&mut self, slice_us: u64) {

scheds/rust/scx_rustland/src/bpf/main.bpf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ char _license[] SEC("license") = "GPL";
4444
#define MAX_CPUS 1024
4545

4646
/* !0 for veristat, set during init */
47-
const volatile u32 num_possible_cpus = 8;
47+
const volatile s32 num_possible_cpus = 8;
4848

4949
/*
5050
* Exit info (passed to the user-space counterpart).
@@ -323,7 +323,7 @@ static s32 get_task_cpu(struct task_struct *p, s32 cpu)
323323
* Return -ENOENT if no CPU is available.
324324
*/
325325
cpu = bpf_cpumask_any_distribute(p->cpus_ptr);
326-
return cpu < num_possible_cpus ? : -ENOENT;
326+
return cpu < num_possible_cpus ? cpu : -ENOENT;
327327
}
328328

329329
/*

scheds/rust/scx_rustland/src/main.rs

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ struct Scheduler<'a> {
164164
task_pool: TaskTree, // tasks ordered by vruntime
165165
task_map: TaskInfoMap, // map pids to the corresponding task information
166166
min_vruntime: u64, // Keep track of the minimum vruntime across all tasks
167-
nr_cpus_online: i32, // Amount of the available CPUs in the system
168167
slice_ns: u64, // Default time slice (in ns)
169168
}
170169

@@ -186,19 +185,12 @@ impl<'a> Scheduler<'a> {
186185
// Initialize global minimum vruntime.
187186
let min_vruntime: u64 = 0;
188187

189-
// Initialize online CPUs counter.
190-
//
191-
// We should probably refresh this counter during the normal execution to support cpu
192-
// hotplugging, but for now let's keep it simple and set this only at initialization).
193-
let nr_cpus_online = libbpf_rs::num_possible_cpus().unwrap() as i32;
194-
195188
// Return scheduler object.
196189
Ok(Self {
197190
bpf,
198191
task_pool,
199192
task_map,
200193
min_vruntime,
201-
nr_cpus_online,
202194
slice_ns,
203195
})
204196
}
@@ -207,7 +199,7 @@ impl<'a> Scheduler<'a> {
207199
fn get_idle_cpus(&self) -> Vec<i32> {
208200
let mut idle_cpus = Vec::new();
209201

210-
for cpu in 0..self.nr_cpus_online {
202+
for cpu in 0..self.bpf.get_nr_cpus() {
211203
let pid = self.bpf.get_cpu_pid(cpu);
212204
if pid == 0 {
213205
idle_cpus.push(cpu);
@@ -228,10 +220,6 @@ impl<'a> Scheduler<'a> {
228220
min_vruntime: u64,
229221
slice_ns: u64,
230222
) {
231-
// Allow to scale the maximum time slice by a factor of 10 to increase the range of allowed
232-
// time delta and give a better chance to prioritize tasks with higher weight.
233-
let max_slice_ns = slice_ns * 10;
234-
235223
// Evaluate last time slot used by the task, scaled by its priority (weight).
236224
//
237225
// NOTE: make sure to handle the case where the current sum_exec_runtime is less then the
@@ -251,15 +239,15 @@ impl<'a> Scheduler<'a> {
251239

252240
// Make sure that the updated vruntime is in the range:
253241
//
254-
// (min_vruntime, min_vruntime + max_slice_ns]
242+
// (min_vruntime, min_vruntime + slice_ns]
255243
//
256244
// In this way we ensure that global vruntime is always progressing during each scheduler
257245
// run, preventing excessive starvation of the other tasks sitting in the self.task_pool
258246
// tree.
259247
//
260-
// Moreover, limiting the accounted time slice to max_slice_ns, allows to prevent starving
261-
// the current task for too long in the scheduler task pool.
262-
task_info.vruntime = min_vruntime + slice.clamp(1, max_slice_ns);
248+
// Moreover, limiting the accounted time slice to slice_ns, allows to prevent starving the
249+
// current task for too long in the scheduler task pool.
250+
task_info.vruntime = min_vruntime + slice.clamp(1, slice_ns);
263251

264252
// Update total task cputime.
265253
task_info.sum_exec_runtime = sum_exec_runtime;
@@ -327,7 +315,7 @@ impl<'a> Scheduler<'a> {
327315
let nr_queued = *self.bpf.nr_queued_mut();
328316
let nr_scheduled = *self.bpf.nr_scheduled_mut();
329317
let nr_waiting = nr_queued + nr_scheduled;
330-
let nr_cpus = self.nr_cpus_online as u64;
318+
let nr_cpus = self.bpf.get_nr_cpus() as u64;
331319

332320
// Scale time slice, but never scale below 1 ms.
333321
let scaling = nr_waiting / nr_cpus + 1;
@@ -475,7 +463,7 @@ impl<'a> Scheduler<'a> {
475463
Err(_) => -1,
476464
};
477465
info!("Running tasks:");
478-
for cpu in 0..self.nr_cpus_online {
466+
for cpu in 0..self.bpf.get_nr_cpus() {
479467
let pid = if cpu == sched_cpu {
480468
"[self]".to_string()
481469
} else {

0 commit comments

Comments
 (0)