Skip to content

Commit d73671a

Browse files
committed
Introduce io_uring dump/restore support
Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]>
1 parent 5f539fc commit d73671a

File tree

20 files changed

+1652
-35
lines changed

20 files changed

+1652
-35
lines changed

criu/Makefile.crtools

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ obj-y += fsnotify.o
3131
obj-y += image-desc.o
3232
obj-y += image.o
3333
obj-y += img-streamer.o
34+
obj-y += io_uring.o
3435
obj-y += ipc_ns.o
3536
obj-y += irmap.o
3637
obj-y += kcmp-ids.o

criu/cr-dump.c

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
#include "pidfd-store.h"
8686
#include "apparmor.h"
8787
#include "asm/dump.h"
88+
#include "io_uring.h"
8889

8990
/*
9091
* Architectures can overwrite this function to restore register sets that
@@ -191,10 +192,11 @@ struct cr_imgset *glob_imgset;
191192

192193
static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds)
193194
{
195+
char buf[PATH_MAX] = {};
194196
struct dirent *de;
195-
DIR *fd_dir;
197+
int n, pidfd = -1;
196198
int size = 0;
197-
int n;
199+
DIR *fd_dir;
198200

199201
pr_info("\n");
200202
pr_info("Collecting fds (pid: %d)\n", pid);
@@ -204,6 +206,59 @@ static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds)
204206
if (!fd_dir)
205207
return -1;
206208

209+
/* Before collecting fds, we need to bring io_uring to steady state,
210+
* since it can install fds into task's fdtable, and if we do it later,
211+
* during actual io_uring dump, we will miss dumping these files.
212+
*/
213+
while ((de = readdir(fd_dir))) {
214+
if (dir_dots(de))
215+
continue;
216+
217+
n = dirfd(fd_dir);
218+
if (n == -1) {
219+
close(pidfd);
220+
return -1;
221+
}
222+
223+
n = readlinkat(n, de->d_name, buf, sizeof(buf));
224+
if (n == -1) {
225+
close(pidfd);
226+
return -1;
227+
}
228+
229+
if (is_io_uring_link(buf)) {
230+
if (!kdat.has_pidfd_open) {
231+
pr_err("pidfd_open system call not supported\n");
232+
return -ENOTSUP;
233+
}
234+
235+
if (!kdat.has_pidfd_getfd) {
236+
pr_err("pidfd_getfd system call not supported\n");
237+
return -ENOTSUP;
238+
}
239+
240+
if (pidfd == -1) {
241+
pidfd = syscall(SYS_pidfd_open, pid, 0);
242+
if (pidfd < 0) {
243+
pr_err("Failed to open pidfd for pid %d\n", pid);
244+
return pidfd;
245+
}
246+
}
247+
248+
if (io_uring_synchronize_fd(syscall(__NR_pidfd_getfd, pidfd, atoi(de->d_name), 0))) {
249+
pr_err("Failed to synchronize io_uring fd %d for pid %d\n", atoi(de->d_name), pid);
250+
close(pidfd);
251+
return -1;
252+
}
253+
}
254+
}
255+
256+
if (pidfd >= 0)
257+
close(pidfd);
258+
259+
/* Collect fds now */
260+
rewinddir(fd_dir);
261+
207262
n = 0;
208263
while ((de = readdir(fd_dir))) {
209264
if (dir_dots(de))
@@ -489,6 +544,8 @@ static int dump_task_mm(pid_t pid, const struct proc_pid_stat *stat, const struc
489544
ret = check_sysvipc_map_dump(pid, vma);
490545
else if (vma_entry_is(vma, VMA_AREA_SOCKET))
491546
ret = dump_socket_map(vma_area);
547+
else if (vma_entry_is(vma, VMA_AREA_IO_URING))
548+
ret = dump_io_uring_map(vma_area);
492549
else
493550
ret = 0;
494551
if (ret)

criu/cr-restore.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "servicefd.h"
3030
#include "image.h"
3131
#include "img-streamer.h"
32+
#include "io_uring.h"
3233
#include "util.h"
3334
#include "util-pie.h"
3435
#include "criu-log.h"
@@ -277,7 +278,7 @@ static struct collect_image_info *cinfos_files[] = {
277278
&unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo,
278279
&netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo,
279280
&tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo,
280-
&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo,
281+
&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &io_uring_cinfo,
281282
};
282283

283284
/* These images are required to restore namespaces */

criu/files.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "kerndat.h"
5050
#include "fdstore.h"
5151
#include "bpfmap.h"
52+
#include "io_uring.h"
5253

5354
#include "protobuf.h"
5455
#include "util.h"
@@ -536,6 +537,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
536537
else if (is_bpfmap_link(link))
537538
ops = &bpfmap_dump_ops;
538539
#endif
540+
else if (is_io_uring_link(link))
541+
ops = &io_uring_dump_ops;
539542
else
540543
return dump_unsupp_fd(&p, lfd, "anon", link, e);
541544

criu/image-desc.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
107107
FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF),
108108
FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF),
109109
FD_ENTRY(APPARMOR, "apparmor"),
110+
FD_ENTRY_F(IO_URING_FILE, "io_uring-file", O_NOBUF),
111+
FD_ENTRY_F(IO_URING_DATA, "io_uring-data", O_NOBUF),
110112

111113
[CR_FD_STATS] = {
112114
.fmt = "stats-%s",

criu/include/image-desc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ enum {
7171
CR_FD_MEMFD_INODE,
7272
CR_FD_BPFMAP_FILE,
7373
CR_FD_BPFMAP_DATA,
74+
CR_FD_IO_URING_FILE,
75+
CR_FD_IO_URING_DATA,
7476
_CR_FD_GLOB_TO,
7577

7678
CR_FD_TMPFS_IMG,

criu/include/image.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,12 @@
7979
#define VMA_ANON_SHARED (1 << 8)
8080
#define VMA_ANON_PRIVATE (1 << 9)
8181

82-
#define VMA_AREA_SYSVIPC (1 << 10)
83-
#define VMA_AREA_SOCKET (1 << 11)
84-
#define VMA_AREA_VVAR (1 << 12)
85-
#define VMA_AREA_AIORING (1 << 13)
86-
#define VMA_AREA_MEMFD (1 << 14)
82+
#define VMA_AREA_SYSVIPC (1 << 10)
83+
#define VMA_AREA_SOCKET (1 << 11)
84+
#define VMA_AREA_VVAR (1 << 12)
85+
#define VMA_AREA_AIORING (1 << 13)
86+
#define VMA_AREA_MEMFD (1 << 14)
87+
#define VMA_AREA_IO_URING (1 << 15)
8788

8889
#define VMA_CLOSE (1 << 28)
8990
#define VMA_NO_PROT_WRITE (1 << 29)

criu/include/io_uring.h

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#ifndef __CR_IO_URING_H__
2+
#define __CR_IO_URING_H__
3+
4+
#include <linux/capability.h>
5+
6+
#include "files.h"
7+
#include "io_uring.pb-c.h"
8+
9+
/* Definitions */
10+
struct __io_uring_restriction {
11+
__u16 opcode;
12+
union {
13+
__u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
14+
__u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
15+
__u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
16+
};
17+
__u8 resv;
18+
__u32 resv2[3];
19+
};
20+
21+
#ifndef IORING_SETUP_IOPOLL
22+
#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
23+
#endif
24+
#ifndef IORING_SETUP_SQPOLL
25+
#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
26+
#endif
27+
#ifndef IORING_SETUP_SQ_AFF
28+
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
29+
#endif
30+
#ifndef IORING_SETUP_CQSIZE
31+
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
32+
#endif
33+
#ifndef IORING_SETUP_ATTACH_WQ
34+
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
35+
#endif
36+
#ifndef IORING_SETUP_R_DISABLED
37+
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
38+
#endif
39+
40+
#ifndef IORING_OFF_SQ_RING
41+
#define IORING_OFF_SQ_RING 0ULL
42+
#endif
43+
#ifndef IORING_OFF_CQ_RING
44+
#define IORING_OFF_CQ_RING 0x8000000ULL
45+
#endif
46+
#ifndef IORING_OFF_SQES
47+
#define IORING_OFF_SQES 0x10000000ULL
48+
#endif
49+
50+
#ifndef IOSQE_IO_DRAIN
51+
#define IOSQE_IO_DRAIN (1U << 1)
52+
#endif
53+
54+
#define __IORING_RESTRICTION_REGISTER_OP 0
55+
#define __IORING_RESTRICTION_SQE_OP 1
56+
#define __IORING_RESTRICTION_SQE_FLAGS_ALLOWED 2
57+
#define __IORING_RESTRICTION_SQE_FLAGS_REQUIRED 3
58+
#define __IORING_REGISTER_PERSONALITY 9
59+
#define __IORING_REGISTER_RESTRICTIONS 11
60+
#define __IORING_REGISTER_ENABLE_RINGS 12
61+
62+
struct io_uring_file_info {
63+
IoUringFileEntry *iofe;
64+
struct file_desc d;
65+
};
66+
67+
struct io_uring_data_info {
68+
IoUringDataEntry *iode;
69+
};
70+
71+
struct io_uring_group_desc {
72+
struct list_head list;
73+
gid_t group;
74+
char group_name[32];
75+
};
76+
77+
struct io_uring_personality_desc {
78+
int id;
79+
uid_t uid;
80+
uid_t euid;
81+
uid_t suid;
82+
uid_t fsuid;
83+
gid_t gid;
84+
gid_t egid;
85+
gid_t sgid;
86+
gid_t fsgid;
87+
u32 cap_eff[CR_CAP_SIZE];
88+
size_t nr_groups;
89+
struct list_head group_list;
90+
};
91+
92+
struct io_uring_ctx;
93+
94+
extern struct collect_image_info io_uring_cinfo;
95+
extern struct collect_image_info io_uring_data_cinfo;
96+
extern const struct fdtype_ops io_uring_dump_ops;
97+
98+
int is_io_uring_link(char *link);
99+
int io_uring_synchronize_fd(int fd);
100+
int collect_io_uring_map(struct vma_area *vma);
101+
int dump_io_uring_map(struct vma_area *vma);
102+
int add_one_io_uring_mapping(uint64_t offset, ino_t inode);
103+
104+
int io_uring_push_buf(struct io_uring_ctx *ctx, unsigned int idx, long long unsigned int address, unsigned int len);
105+
int io_uring_push_personality(struct io_uring_ctx *ctx, struct io_uring_personality_desc *desc);
106+
IoUringFileEntry *io_uring_get_iofe(struct io_uring_ctx *ctx);
107+
108+
#endif /* __CR_IO_URING_H__ */

criu/include/magic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@
100100
#define BPFMAP_FILE_MAGIC 0x57506142 /* Alapayevsk */
101101
#define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */
102102
#define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */
103+
#define IO_URING_FILE_MAGIC 0x55403656 /* Butyn */
104+
#define IO_URING_DATA_MAGIC 0x54194822 /* Ulyanovsk */
103105

104106
#define IFADDR_MAGIC RAW_IMAGE_MAGIC
105107
#define ROUTE_MAGIC RAW_IMAGE_MAGIC

criu/include/protobuf-desc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ enum {
7070
PB_BPFMAP_FILE,
7171
PB_BPFMAP_DATA,
7272
PB_APPARMOR,
73+
PB_IO_URING_FILE,
74+
PB_IO_URING_DATA,
7375

7476
/* PB_AUTOGEN_STOP */
7577

0 commit comments

Comments
 (0)