diff --git a/ip/ipnetns.c b/ip/ipnetns.c index 14e8e087..3e96d267 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #define _ATFILE_SOURCE +#include #include #include #include @@ -801,6 +802,7 @@ static int netns_add(int argc, char **argv, bool create) const char *name; pid_t pid; int fd; + int lock; int made_netns_run_dir_mount = 0; if (create) { @@ -831,12 +833,37 @@ static int netns_add(int argc, char **argv, bool create) * namespace file in one namespace will unmount the network namespace * file in all namespaces allowing the network namespace to be freed * sooner. + * These setup steps need to happen only once, as if multiple ip processes + * try to attempt the same operation at the same time, the mountpoints will + * be recursively created multiple times, eventually causing the system + * to lock up. For example, this has been observed when multiple netns + * namespaces are created in parallel at boot. See: + * https://bugs.debian.org/949235 + * Try to take an exclusive file lock on the top level directory to ensure + * this cannot happen, but proceed nonetheless if it cannot happen for any + * reason. */ + lock = open(NETNS_RUN_DIR, O_RDONLY|O_DIRECTORY, 0); + if (lock < 0) { + fprintf(stderr, "Cannot open netns runtime directory \"%s\": %s\n", + NETNS_RUN_DIR, strerror(errno)); + return -1; + } + if (flock(lock, LOCK_EX) < 0) { + fprintf(stderr, "Warning: could not flock netns runtime directory \"%s\": %s\n", + NETNS_RUN_DIR, strerror(errno)); + close(lock); + lock = -1; + } while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) { /* Fail unless we need to make the mount point */ if (errno != EINVAL || made_netns_run_dir_mount) { fprintf(stderr, "mount --make-shared %s failed: %s\n", NETNS_RUN_DIR, strerror(errno)); + if (lock != -1) { + flock(lock, LOCK_UN); + close(lock); + } return -1; } @@ -844,10 +871,18 @@ static int netns_add(int argc, char **argv, bool create) if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND | MS_REC, NULL)) { fprintf(stderr, "mount --bind %s %s failed: %s\n", NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno)); + if (lock != -1) { + flock(lock, LOCK_UN); + close(lock); + } return -1; } made_netns_run_dir_mount = 1; } + if (lock != -1) { + flock(lock, LOCK_UN); + close(lock); + } /* Create the filesystem state */ fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0);