Move cgroup dbus requests to the child to avoid a race where a spawned child

that quickly forks will have only the parent process moved to the newly created
cgroup. From Daniel De Graaf, GitHub issue 4435.
This commit is contained in:
Nicholas Marriott
2025-04-14 07:26:05 +01:00
parent ac2779f4d8
commit f0a85d0469
3 changed files with 114 additions and 22 deletions

View File

@ -450,7 +450,7 @@ void *recallocarray(void *, size_t, size_t, size_t);
/* systemd.c */
int systemd_activated(void);
int systemd_create_socket(int, char **);
int systemd_move_pid_to_new_cgroup(pid_t, char **);
int systemd_move_to_new_cgroup(char **);
#endif
#ifdef HAVE_UTF8PROC

View File

@ -75,16 +75,49 @@ fail:
return (-1);
}
int
systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
struct systemd_job_watch {
const char *path;
int done;
};
static int
job_removed_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error)
{
sd_bus_error error = SD_BUS_ERROR_NULL;
sd_bus_message *m = NULL, *reply = NULL;
sd_bus *bus = NULL;
char *name, *desc, *slice;
sd_id128_t uuid;
int r;
pid_t parent_pid;
struct systemd_job_watch *watch = userdata;
const char *path = NULL;
uint32_t id;
int r;
/* This handler could be called during the sd_bus_call. */
if (watch->path == NULL)
return 0;
r = sd_bus_message_read(m, "uo", &id, &path);
if (r < 0)
return (r);
if (strcmp(path, watch->path) == 0)
watch->done = 1;
return (0);
}
int
systemd_move_to_new_cgroup(char **cause)
{
sd_bus_error error = SD_BUS_ERROR_NULL;
sd_bus_message *m = NULL, *reply = NULL;
sd_bus *bus = NULL;
sd_bus_slot *slot = NULL;
char *name, *desc, *slice;
sd_id128_t uuid;
int r;
uint64_t elapsed_usec;
pid_t pid, parent_pid;
struct timeval start, now;
struct systemd_job_watch watch = {};
gettimeofday(&start, NULL);
/* Connect to the session bus. */
r = sd_bus_default_user(&bus);
@ -94,6 +127,20 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
goto finish;
}
/* Start watching for JobRemoved events */
r = sd_bus_match_signal(bus, &slot,
"org.freedesktop.systemd1",
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"JobRemoved",
job_removed_handler,
&watch);
if (r < 0) {
xasprintf(cause, "failed to create match signal: %s",
strerror(-r));
goto finish;
}
/* Start building the method call. */
r = sd_bus_message_new_method_call(bus, &m,
"org.freedesktop.systemd1",
@ -138,7 +185,8 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
goto finish;
}
parent_pid = getpid();
pid = getpid();
parent_pid = getppid();
xasprintf(&desc, "tmux child pane %ld launched by process %ld",
(long)pid, (long)parent_pid);
r = sd_bus_message_append(m, "(sv)", "Description", "s", desc);
@ -223,10 +271,55 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
goto finish;
}
/* Get the job (object path) from the reply */
r = sd_bus_message_read(reply, "o", &watch.path);
if (r < 0) {
xasprintf(cause, "failed to parse method reply: %s",
strerror(-r));
goto finish;
}
while (!watch.done) {
/* Process events including callbacks. */
r = sd_bus_process(bus, NULL);
if (r < 0) {
xasprintf(cause,
"failed waiting for cgroup allocation: %s",
strerror(-r));
goto finish;
}
/*
* A positive return means we handled an event and should keep
* processing; zero indicates no events available, so wait.
*/
if (r > 0)
continue;
gettimeofday(&now, NULL);
elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 +
now.tv_usec - start.tv_usec;
if (elapsed_usec >= 1000000) {
xasprintf(cause,
"timeout waiting for cgroup allocation");
goto finish;
}
r = sd_bus_wait(bus, 1000000 - elapsed_usec);
if (r < 0) {
xasprintf(cause,
"failed waiting for cgroup allocation: %s",
strerror(-r));
goto finish;
}
}
finish:
sd_bus_error_free(&error);
sd_bus_message_unref(m);
sd_bus_message_unref(reply);
sd_bus_slot_unref(slot);
sd_bus_unref(bus);
return (r);

21
spawn.c
View File

@ -382,20 +382,19 @@ spawn_pane(struct spawn_context *sc, char **cause)
/* In the parent process, everything is done now. */
if (new_wp->pid != 0) {
#if defined(HAVE_SYSTEMD) && defined(ENABLE_CGROUPS)
/*
* Move the child process into a new cgroup for systemd-oomd
* isolation.
*/
if (systemd_move_pid_to_new_cgroup(new_wp->pid, cause) < 0) {
log_debug("%s: moving pane to new cgroup failed: %s",
__func__, *cause);
free (*cause);
}
#endif
goto complete;
}
#if defined(HAVE_SYSTEMD) && defined(ENABLE_CGROUPS)
/*
* Move the child process into a new cgroup for systemd-oomd isolation.
*/
if (systemd_move_to_new_cgroup(cause) < 0) {
log_debug("%s: moving pane to new cgroup failed: %s",
__func__, *cause);
free (*cause);
}
#endif
/*
* Child process. Change to the working directory or home if that
* fails.