From d34cf29b3831637cd3c79045b68440d34c19eeec Mon Sep 17 00:00:00 2001 From: Daniel De Graaf <code@danieldg.net> Date: Tue, 25 Mar 2025 22:32:19 -0400 Subject: [PATCH 1/2] Move cgroup dbus requests to the child This avoids a race where a spawned child that quickly forks will have only the parent process moved to the newly created cgroup, leaving the early children in tmux's own cgroup. It also avoids problems if the spawned process inspects or changes its own cgroup. --- compat.h | 2 +- compat/systemd.c | 7 ++++--- spawn.c | 21 ++++++++++----------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/compat.h b/compat.h index 93928603..bdc77513 100644 --- a/compat.h +++ b/compat.h @@ -450,7 +450,7 @@ void *recallocarray(void *, size_t, size_t, size_t); /* systemd.c */ int systemd_activated(void); int systemd_create_socket(int, char **); -int systemd_move_pid_to_new_cgroup(pid_t, char **); +int systemd_move_to_new_cgroup(char **); #endif #ifdef HAVE_UTF8PROC diff --git a/compat/systemd.c b/compat/systemd.c index 22773c42..b3d51b81 100644 --- a/compat/systemd.c +++ b/compat/systemd.c @@ -76,7 +76,7 @@ fail: } int -systemd_move_pid_to_new_cgroup(pid_t pid, char **cause) +systemd_move_to_new_cgroup(char **cause) { sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus_message *m = NULL, *reply = NULL; @@ -84,7 +84,7 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause) char *name, *desc, *slice; sd_id128_t uuid; int r; - pid_t parent_pid; + pid_t pid, parent_pid; /* Connect to the session bus. */ r = sd_bus_default_user(&bus); @@ -138,7 +138,8 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause) goto finish; } - parent_pid = getpid(); + pid = getpid(); + parent_pid = getppid(); xasprintf(&desc, "tmux child pane %ld launched by process %ld", (long)pid, (long)parent_pid); r = sd_bus_message_append(m, "(sv)", "Description", "s", desc); diff --git a/spawn.c b/spawn.c index d321dba4..0342ea03 100644 --- a/spawn.c +++ b/spawn.c @@ -382,20 +382,19 @@ spawn_pane(struct spawn_context *sc, char **cause) /* In the parent process, everything is done now. */ if (new_wp->pid != 0) { -#if defined(HAVE_SYSTEMD) && defined(ENABLE_CGROUPS) - /* - * Move the child process into a new cgroup for systemd-oomd - * isolation. - */ - if (systemd_move_pid_to_new_cgroup(new_wp->pid, cause) < 0) { - log_debug("%s: moving pane to new cgroup failed: %s", - __func__, *cause); - free (*cause); - } -#endif goto complete; } +#if defined(HAVE_SYSTEMD) && defined(ENABLE_CGROUPS) + /* + * Move the child process into a new cgroup for systemd-oomd isolation. + */ + if (systemd_move_to_new_cgroup(cause) < 0) { + log_debug("%s: moving pane to new cgroup failed: %s", + __func__, *cause); + free (*cause); + } +#endif /* * Child process. Change to the working directory or home if that * fails. From 289eb5ccd9c417533694fbd0f6ad8f3fa3bf8b9b Mon Sep 17 00:00:00 2001 From: Daniel De Graaf <code@danieldg.net> Date: Wed, 26 Mar 2025 19:54:46 -0400 Subject: [PATCH 2/2] Wait for the cgroup request job to complete The StartTransientUnit call returns as soon as the job is enqueued, but does not wait for systemd to actually do the work. Wait for the job completion signal before continuing to exec, or until 1 second has passed. --- compat/systemd.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/compat/systemd.c b/compat/systemd.c index b3d51b81..15f33aad 100644 --- a/compat/systemd.c +++ b/compat/systemd.c @@ -75,16 +75,53 @@ fail: return (-1); } +struct job_watch { + const char* path; + int done; +}; + +static int +job_removed_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) +{ + struct job_watch *watch = userdata; + const char* path = NULL; + uint32_t id; + int r; + (void)ret_error; + + /* This handler could be called during sd_bus_call. */ + if (watch->path == NULL) { + return 0; + } + + r = sd_bus_message_read(m, "uo", &id, &path); + if (r < 0) { + return (r); + } + + if (strcmp(path, watch->path) == 0) { + watch->done = 1; + } + + return (0); +} + int systemd_move_to_new_cgroup(char **cause) { sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus_message *m = NULL, *reply = NULL; sd_bus *bus = NULL; + sd_bus_slot *slot = NULL; char *name, *desc, *slice; sd_id128_t uuid; int r; + uint64_t elapsed_usec; pid_t pid, parent_pid; + struct job_watch watch = {}; + struct timeval start, now; + + gettimeofday(&start, NULL); /* Connect to the session bus. */ r = sd_bus_default_user(&bus); @@ -94,6 +131,20 @@ systemd_move_to_new_cgroup(char **cause) goto finish; } + /* Start watching for JobRemoved events */ + r = sd_bus_match_signal(bus, &slot, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "JobRemoved", + job_removed_handler, + &watch); + if (r < 0) { + xasprintf(cause, "failed to create match signal: %s", + strerror(-r)); + goto finish; + } + /* Start building the method call. */ r = sd_bus_message_new_method_call(bus, &m, "org.freedesktop.systemd1", @@ -224,10 +275,49 @@ systemd_move_to_new_cgroup(char **cause) goto finish; } + /* Get the job (object path) from the reply */ + r = sd_bus_message_read(reply, "o", &watch.path); + if (r < 0) { + xasprintf(cause, "failed to parse method reply: %s", + strerror(-r)); + goto finish; + } + + while (!watch.done) { + /* Process events, invoking callbacks that may set watch.done */ + r = sd_bus_process(bus, NULL); + if (r < 0) { + xasprintf(cause, "failed waiting for cgroup allocation: %s", + strerror(-r)); + goto finish; + } + + /* A zero return means we should wait for events */ + if (r != 0) { + continue; + } + + gettimeofday(&now, NULL); + elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 + now.tv_usec - start.tv_usec; + + if (elapsed_usec >= 1000000) { + xasprintf(cause, "timeout waiting for cgroup allocation"); + goto finish; + } + + r = sd_bus_wait(bus, 1000000 - elapsed_usec); + if (r < 0) { + xasprintf(cause, "failed waiting for cgroup allocation: %s", + strerror(-r)); + goto finish; + } + } + finish: sd_bus_error_free(&error); sd_bus_message_unref(m); sd_bus_message_unref(reply); + sd_bus_slot_unref(slot); sd_bus_unref(bus); return (r);