From a2f13795f3889ecf5c0e379c9f9d7585da4ef640 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Wed, 23 Jul 2025 09:06:50 +0800 Subject: [PATCH] libbpf-tools: tcpstates: Fix CLOSE to SYN_SENT wrong delta When the TCP socket is initially created, the timestamp of when sk is created is not recorded, so the delay from the CLOSE state to any state is 0, which is obviously wrong. This patch records the time when sk is created as the start time of the CLOSE state, thereby obtaining the time from CLOSE to LISTEN or SYN_SENT. At the same time, because some time differences are really too small, nanosecond level time display support is added (-n). Before: bcc/libbpf-tools$ sudo ./tcpstates | grep CLOSE ffff8b296bd1af80 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ffff8b296bd1df00 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ffff8b29c851a600 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ffff8b29c4ac4c00 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ffff8b29c4ac6880 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ffff8b29c4ac4280 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ffff8b29c4ac5580 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.000 ^^^^^ After: bcc/libbpf-tools$ sudo ./tcpstates | grep CLOSE ffff8b29f6c91300 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.020 ffff8b29f6c94280 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.011 ffff8b2a1d45cc00 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.022 ffff8b2a1d45af80 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.027 ffff8b29c8518980 421523 Chrome_Chi 10.56.52.9 0 10.32.0.200 8080 CLOSE -> SYN_SENT 0.023 Signed-off-by: Rong Tao --- libbpf-tools/tcpstates.bpf.c | 27 +++++++++++++++++++++++---- libbpf-tools/tcpstates.c | 17 +++++++++++++---- libbpf-tools/tcpstates.h | 2 +- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/libbpf-tools/tcpstates.bpf.c b/libbpf-tools/tcpstates.bpf.c index 0f9ed2414a08..b5703805b452 100644 --- a/libbpf-tools/tcpstates.bpf.c +++ b/libbpf-tools/tcpstates.bpf.c @@ -41,6 +41,25 @@ struct { __uint(value_size, sizeof(__u32)); } events SEC(".maps"); +static inline int tcp_sock_create(struct sock *sk) +{ + __u64 ts = bpf_ktime_get_ns(); + bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); + return 0; +} + +SEC("kprobe/tcp_v4_init_sock") +int BPF_KPROBE(tcp_v4_init_sock, struct sock *sk) +{ + return tcp_sock_create(sk); +} + +SEC("kprobe/tcp_v6_init_sock") +int BPF_KPROBE(tcp_v6_init_sock, struct sock *sk) +{ + return tcp_sock_create(sk); +} + SEC("tracepoint/sock/inet_sock_set_state") int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) { @@ -48,7 +67,7 @@ int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) __u16 family = ctx->family; __u16 sport = ctx->sport; __u16 dport = ctx->dport; - __u64 *tsp, delta_us, ts; + __u64 *tsp, delta_ns, ts; struct event event = {}; if (ctx->protocol != IPPROTO_TCP) @@ -66,13 +85,13 @@ int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) tsp = bpf_map_lookup_elem(×tamps, &sk); ts = bpf_ktime_get_ns(); if (!tsp) - delta_us = 0; + delta_ns = 0; else - delta_us = (ts - *tsp) / 1000; + delta_ns = (ts - *tsp); event.skaddr = (__u64)sk; event.ts_us = ts / 1000; - event.delta_us = delta_us; + event.delta_ns = delta_ns; event.pid = bpf_get_current_pid_tgid() >> 32; event.oldstate = ctx->oldstate; event.newstate = ctx->newstate; diff --git a/libbpf-tools/tcpstates.c b/libbpf-tools/tcpstates.c index a0e30936e268..1cf99e13e23d 100644 --- a/libbpf-tools/tcpstates.c +++ b/libbpf-tools/tcpstates.c @@ -33,6 +33,7 @@ static short target_family = 0; static char *target_sports = NULL; static char *target_dports = NULL; static bool wide_output = false; +static bool nanoseconds = false; static bool verbose = false; static const char *tcp_states[] = { [1] = "ESTABLISHED", @@ -70,6 +71,7 @@ static const struct argp_option opts[] = { { "ipv4", '4', NULL, 0, "Trace IPv4 family only", 0 }, { "ipv6", '6', NULL, 0, "Trace IPv6 family only", 0 }, { "wide", 'w', NULL, 0, "Wide column output (fits IPv6 addresses)", 0 }, + { "nanoseconds", 'n', NULL, 0, "Display nanosecond delay", 0 }, { "localport", 'L', "LPORT", 0, "Comma-separated list of local ports to trace.", 0 }, { "remoteport", 'D', "DPORT", 0, "Comma-separated list of remote ports to trace.", 0 }, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help", 0 }, @@ -97,6 +99,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'w': wide_output = true; break; + case 'n': + nanoseconds = true; + break; case 'L': if (!arg) { warn("No ports specified\n"); @@ -175,11 +180,13 @@ static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) family = e.family == AF_INET ? 4 : 6; printf("%-16llx %-7d %-16s %-2d %-39s %-5d %-39s %-5d %-11s -> %-11s %.3f\n", e.skaddr, e.pid, e.task, family, saddr, e.sport, daddr, e.dport, - tcp_states[e.oldstate], tcp_states[e.newstate], (double)e.delta_us / 1000); + tcp_states[e.oldstate], tcp_states[e.newstate], + (double)e.delta_ns / (nanoseconds ? 1 : 1000000)); } else { printf("%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n", e.skaddr, e.pid, e.task, saddr, e.sport, daddr, e.dport, - tcp_states[e.oldstate], tcp_states[e.newstate], (double)e.delta_us / 1000); + tcp_states[e.oldstate], tcp_states[e.newstate], + (double)e.delta_ns / (nanoseconds ? 1 : 1000000)); } } @@ -274,11 +281,13 @@ int main(int argc, char **argv) if (wide_output) printf("%-16s %-7s %-16s %-2s %-39s %-5s %-39s %-5s %-11s -> %-11s %s\n", "SKADDR", "PID", "COMM", "IP", "LADDR", "LPORT", - "RADDR", "RPORT", "OLDSTATE", "NEWSTATE", "MS"); + "RADDR", "RPORT", "OLDSTATE", "NEWSTATE", + nanoseconds ? "NS" : "MS"); else printf("%-16s %-7s %-10s %-15s %-5s %-15s %-5s %-11s -> %-11s %s\n", "SKADDR", "PID", "COMM", "LADDR", "LPORT", - "RADDR", "RPORT", "OLDSTATE", "NEWSTATE", "MS"); + "RADDR", "RPORT", "OLDSTATE", "NEWSTATE", + nanoseconds ? "NS" : "MS"); while (!exiting) { err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); diff --git a/libbpf-tools/tcpstates.h b/libbpf-tools/tcpstates.h index 31f2b61f7411..29566a755e17 100644 --- a/libbpf-tools/tcpstates.h +++ b/libbpf-tools/tcpstates.h @@ -10,7 +10,7 @@ struct event { unsigned __int128 daddr; __u64 skaddr; __u64 ts_us; - __u64 delta_us; + __u64 delta_ns; __u32 pid; int oldstate; int newstate;