From d34cf29b3831637cd3c79045b68440d34c19eeec Mon Sep 17 00:00:00 2001
From: Daniel De Graaf <code@danieldg.net>
Date: Tue, 25 Mar 2025 22:32:19 -0400
Subject: [PATCH 1/2] Move cgroup dbus requests to the child

This avoids a race where a spawned child that quickly forks will have
only the parent process moved to the newly created cgroup, leaving the
early children in tmux's own cgroup.  It also avoids problems if the
spawned process inspects or changes its own cgroup.
---
 compat.h         |  2 +-
 compat/systemd.c |  7 ++++---
 spawn.c          | 21 ++++++++++-----------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/compat.h b/compat.h
index 93928603..bdc77513 100644
--- a/compat.h
+++ b/compat.h
@@ -450,7 +450,7 @@ void		*recallocarray(void *, size_t, size_t, size_t);
 /* systemd.c */
 int		 systemd_activated(void);
 int		 systemd_create_socket(int, char **);
-int		 systemd_move_pid_to_new_cgroup(pid_t, char **);
+int		 systemd_move_to_new_cgroup(char **);
 #endif
 
 #ifdef HAVE_UTF8PROC
diff --git a/compat/systemd.c b/compat/systemd.c
index 22773c42..b3d51b81 100644
--- a/compat/systemd.c
+++ b/compat/systemd.c
@@ -76,7 +76,7 @@ fail:
 }
 
 int
-systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
+systemd_move_to_new_cgroup(char **cause)
 {
 	sd_bus_error	 error = SD_BUS_ERROR_NULL;
 	sd_bus_message	*m = NULL, *reply = NULL;
@@ -84,7 +84,7 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
 	char		*name, *desc, *slice;
 	sd_id128_t	 uuid;
 	int		 r;
-	pid_t		 parent_pid;
+	pid_t		 pid, parent_pid;
 
 	/* Connect to the session bus. */
 	r = sd_bus_default_user(&bus);
@@ -138,7 +138,8 @@ systemd_move_pid_to_new_cgroup(pid_t pid, char **cause)
 		goto finish;
 	}
 
-	parent_pid = getpid();
+	pid = getpid();
+	parent_pid = getppid();
 	xasprintf(&desc, "tmux child pane %ld launched by process %ld",
 	    (long)pid, (long)parent_pid);
 	r = sd_bus_message_append(m, "(sv)", "Description", "s", desc);
diff --git a/spawn.c b/spawn.c
index d321dba4..0342ea03 100644
--- a/spawn.c
+++ b/spawn.c
@@ -382,20 +382,19 @@ spawn_pane(struct spawn_context *sc, char **cause)
 
 	/* In the parent process, everything is done now. */
 	if (new_wp->pid != 0) {
-#if defined(HAVE_SYSTEMD) && defined(ENABLE_CGROUPS)
-		/*
-		 * Move the child process into a new cgroup for systemd-oomd
-		 * isolation.
-		 */
-		if (systemd_move_pid_to_new_cgroup(new_wp->pid, cause) < 0) {
-			log_debug("%s: moving pane to new cgroup failed: %s",
-			    __func__, *cause);
-			free (*cause);
-		}
-#endif
 		goto complete;
 	}
 
+#if defined(HAVE_SYSTEMD) && defined(ENABLE_CGROUPS)
+	/*
+	 * Move the child process into a new cgroup for systemd-oomd isolation.
+	 */
+	if (systemd_move_to_new_cgroup(cause) < 0) {
+		log_debug("%s: moving pane to new cgroup failed: %s",
+		    __func__, *cause);
+		free (*cause);
+	}
+#endif
 	/*
 	 * Child process. Change to the working directory or home if that
 	 * fails.

From 289eb5ccd9c417533694fbd0f6ad8f3fa3bf8b9b Mon Sep 17 00:00:00 2001
From: Daniel De Graaf <code@danieldg.net>
Date: Wed, 26 Mar 2025 19:54:46 -0400
Subject: [PATCH 2/2] Wait for the cgroup request job to complete

The StartTransientUnit call returns as soon as the job is enqueued, but
does not wait for systemd to actually do the work.  Wait for the job
completion signal before continuing to exec, or until 1 second has
passed.
---
 compat/systemd.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/compat/systemd.c b/compat/systemd.c
index b3d51b81..15f33aad 100644
--- a/compat/systemd.c
+++ b/compat/systemd.c
@@ -75,16 +75,53 @@ fail:
 	return (-1);
 }
 
+struct job_watch {
+	const char*	 path;
+	int		 done;
+};
+
+static int
+job_removed_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error)
+{
+	struct job_watch *watch = userdata;
+	const char*	 path = NULL;
+	uint32_t	 id;
+	int		 r;
+	(void)ret_error;
+
+	/* This handler could be called during sd_bus_call. */
+	if (watch->path == NULL) {
+		return 0;
+	}
+
+	r = sd_bus_message_read(m, "uo", &id, &path);
+	if (r < 0) {
+		return (r);
+	}
+
+	if (strcmp(path, watch->path) == 0) {
+		watch->done = 1;
+	}
+
+	return (0);
+}
+
 int
 systemd_move_to_new_cgroup(char **cause)
 {
 	sd_bus_error	 error = SD_BUS_ERROR_NULL;
 	sd_bus_message	*m = NULL, *reply = NULL;
 	sd_bus 		*bus = NULL;
+	sd_bus_slot	*slot = NULL;
 	char		*name, *desc, *slice;
 	sd_id128_t	 uuid;
 	int		 r;
+	uint64_t	 elapsed_usec;
 	pid_t		 pid, parent_pid;
+	struct job_watch watch = {};
+	struct timeval	 start, now;
+
+	gettimeofday(&start, NULL);
 
 	/* Connect to the session bus. */
 	r = sd_bus_default_user(&bus);
@@ -94,6 +131,20 @@ systemd_move_to_new_cgroup(char **cause)
 		goto finish;
 	}
 
+	/* Start watching for JobRemoved events */
+	r = sd_bus_match_signal(bus, &slot,
+	    "org.freedesktop.systemd1",
+	    "/org/freedesktop/systemd1",
+	    "org.freedesktop.systemd1.Manager",
+	    "JobRemoved",
+	    job_removed_handler,
+	    &watch);
+	if (r < 0) {
+		xasprintf(cause, "failed to create match signal: %s",
+		    strerror(-r));
+		goto finish;
+	}
+
 	/* Start building the method call. */
 	r = sd_bus_message_new_method_call(bus, &m,
 	    "org.freedesktop.systemd1",
@@ -224,10 +275,49 @@ systemd_move_to_new_cgroup(char **cause)
 		goto finish;
 	}
 
+	/* Get the job (object path) from the reply */
+	r = sd_bus_message_read(reply, "o", &watch.path);
+	if (r < 0) {
+		xasprintf(cause, "failed to parse method reply: %s",
+		    strerror(-r));
+		goto finish;
+	}
+
+	while (!watch.done) {
+		/* Process events, invoking callbacks that may set watch.done */
+		r = sd_bus_process(bus, NULL);
+		if (r < 0) {
+			xasprintf(cause, "failed waiting for cgroup allocation: %s",
+			    strerror(-r));
+			goto finish;
+		}
+
+		/* A zero return means we should wait for events */
+		if (r != 0) {
+			continue;
+		}
+
+		gettimeofday(&now, NULL);
+		elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 + now.tv_usec - start.tv_usec;
+
+		if (elapsed_usec >= 1000000) {
+			xasprintf(cause, "timeout waiting for cgroup allocation");
+			goto finish;
+		}
+
+		r = sd_bus_wait(bus, 1000000 - elapsed_usec);
+		if (r < 0) {
+			xasprintf(cause, "failed waiting for cgroup allocation: %s",
+			    strerror(-r));
+			goto finish;
+		}
+	}
+
 finish:
 	sd_bus_error_free(&error);
 	sd_bus_message_unref(m);
 	sd_bus_message_unref(reply);
+	sd_bus_slot_unref(slot);
 	sd_bus_unref(bus);
 
 	return (r);