Submitted By: Douglas R. Reno Date: 2021-07-22 Updated Date: 2022-01-13 Initial Package Version: 249 Origin: Upstream (https://github.com/systemd/systemd/pull/20256/) Upstream Status: Applied Description: This patch guards systemd-249 against CVE-2021-33910. A similar patch will be provided for systemd-246 (LFS 10.0) and systemd-247 (LFS 10.1). The vulnerability is described as "an attacker-controlled alloc() leads to a crash in systemd and ultimately a kernel panic". This vulnerability pertains to the FUSE filesystem layer in the kernel, and depends on the patch in the kernel for CVE-2021-33909. A local attacker who is able to mount a filesystem with a very long path will be able to crash the entire system. It is also possible for this vulnerability to be exploited upon mounting, or automounting (such as via GVFS in XFCE/GNOME), an NTFS or SSHFS filesystem, since those filesystems use FUSE. The vulnerability triggers when reading /proc/self/mountinfo, which systemd does automatically upon a filesystem being mounted. The latest version of this patch (-2) includes a fix for CVE-2021-20316, which is an uncontrolled recursion bug in systemd's systemd-tmpfiles. It also includes a fix for systems with multiple network cards. diff -Naurp systemd-249.orig/src/basic/unit-name.c systemd-249/src/basic/unit-name.c --- systemd-249.orig/src/basic/unit-name.c 2021-07-07 12:41:29.000000000 -0500 +++ systemd-249/src/basic/unit-name.c 2022-01-13 10:02:19.848678516 -0600 @@ -378,12 +378,13 @@ int unit_name_unescape(const char *f, ch } int unit_name_path_escape(const char *f, char **ret) { - char *p, *s; + _cleanup_free_ char *p = NULL; + char *s; assert(f); assert(ret); - p = strdupa(f); + p = strdup(f); if (!p) return -ENOMEM; @@ -395,13 +396,10 @@ int unit_name_path_escape(const char *f, if (!path_is_normalized(p)) return -EINVAL; - /* Truncate trailing slashes */ + /* Truncate trailing slashes and skip leading slashes*/ delete_trailing_chars(p, "/"); - /* Truncate leading slashes */ - p = skip_leading_chars(p, "/"); - - s = unit_name_escape(p); + s = unit_name_escape(skip_leading_chars(p, "/")); } if (!s) return -ENOMEM; diff -Naurp systemd-249.orig/src/network/networkd-link.c systemd-249/src/network/networkd-link.c --- systemd-249.orig/src/network/networkd-link.c 2021-07-07 12:41:29.000000000 -0500 +++ systemd-249/src/network/networkd-link.c 2022-01-13 10:09:26.714963104 -0600 @@ -1401,17 +1401,25 @@ static int link_initialized(Link *link, assert(link); assert(device); - if (link->state != LINK_STATE_PENDING) - return 0; + /* + * Always replace with the new sd_device object. As the sysname (and possibly other properties + * or sysattrs) may be outdated. + */ + sd_device_ref(device); + sd_device_unref(link->sd_device); + link->sd_device = device; - if (link->sd_device) + /* + * Do not ignore unmanaged state case here. If an interface is renamed after being once + * configured, and the corresponding .network file has Name= in the [Match] section, then the + * interface may already be in unmanaged state. See #20657. + */ + if (!IN_SET(link->state, LINK_STATE_PENDING, LINK_STATE_UNMANAGED)) return 0; log_link_debug(link, "udev initialized link"); link_set_state(link, LINK_STATE_INITIALIZED); - link->sd_device = sd_device_ref(device); - /* udev has initialized the link, but we don't know if we have yet * processed the NEWLINK messages with the latest state. Do a GETLINK, * when it returns we know that the pending NEWLINKs have already been diff -Naurp systemd-249.orig/src/shared/rm-rf.c systemd-249/src/shared/rm-rf.c --- systemd-249.orig/src/shared/rm-rf.c 2021-07-07 12:41:29.000000000 -0500 +++ systemd-249/src/shared/rm-rf.c 2022-01-13 11:10:35.943655659 -0600 @@ -19,6 +19,11 @@ #include "stat-util.h" #include "string-util.h" + + /* We treat tmpfs/ramfs + cgroupfs as non-physical filesystems. cgroupfs is similar to tmpfs in a way after + * all: we can create arbitrary directory hierarchies in it, and hence can also use rm_rf() on it to remove + * those again.*/ + static bool is_physical_fs(const struct statfs *sfs) { return !is_temporary_fs(sfs) && !is_cgroup_fs(sfs); } @@ -49,7 +54,6 @@ static int patch_dirfd_mode( } int unlinkat_harder(int dfd, const char *filename, int unlink_flags, RemoveFlags remove_flags) { - mode_t old_mode; int r; @@ -113,133 +117,204 @@ int fstatat_harder(int dfd, return 0; } -int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) { - _cleanup_closedir_ DIR *d = NULL; - struct dirent *de; - int ret = 0, r; - struct statfs sfs; - - assert(fd >= 0); +static int rm_rf_inner_child( + int fd, + const char *fname, + int is_dir, + RemoveFlags flags, + const struct stat *root_dev, + bool allow_recursion) { - /* This returns the first error we run into, but nevertheless tries to go on. This closes the passed - * fd, in all cases, including on failure.. */ + struct stat st; + int r, q = 0; - if (!(flags & REMOVE_PHYSICAL)) { + assert(fd >= 0); + assert(fname); - r = fstatfs(fd, &sfs); - if (r < 0) { - safe_close(fd); - return -errno; - } + if (is_dir < 0 || + root_dev || + (is_dir > 0 && (root_dev || (flags & REMOVE_SUBVOLUME)))) { - if (is_physical_fs(&sfs)) { - /* We refuse to clean physical file systems with this call, - * unless explicitly requested. This is extra paranoia just - * to be sure we never ever remove non-state data. */ - _cleanup_free_ char *path = NULL; - - (void) fd_get_path(fd, &path); - log_error("Attempted to remove disk file system under \"%s\", and we can't allow that.", - strna(path)); + r = fstatat_harder(fd, fname, &st, AT_SYMLINK_NOFOLLOW, flags); + if (r < 0) + return r; - safe_close(fd); - return -EPERM; - } + is_dir = S_ISDIR(st.st_mode); } - d = fdopendir(fd); - if (!d) { - safe_close(fd); - return errno == ENOENT ? 0 : -errno; - } + if (is_dir) { + /* if root_dev is set, remove subdirectories only if device is same */ + if (root_dev && st.st_dev != root_dev->st_dev) + return 0; + + /* stop at mount points */ + r = fd_is_mount_point(fd, fname, 0); + if (r < 0) + return r; + if (r > 0) + return 0; + + if ((flags & REMOVE_SUBVOLUME) && btrfs_might_be_subvol(&st)) { + /* This could be a subvolume, try to remove it */ - FOREACH_DIRENT_ALL(de, d, return -errno) { - bool is_dir; - struct stat st; - - if (dot_or_dot_dot(de->d_name)) - continue; - - if (de->d_type == DT_UNKNOWN || - (de->d_type == DT_DIR && (root_dev || (flags & REMOVE_SUBVOLUME)))) { - r = fstatat_harder(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW, flags); + r = btrfs_subvol_remove_fd(fd, fname, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (r < 0) { - if (ret == 0 && r != -ENOENT) - ret = r; - continue; - } + if (!IN_SET(r, -ENOTTY, -EINVAL)) + return r; - is_dir = S_ISDIR(st.st_mode); - } else - is_dir = de->d_type == DT_DIR; - - if (is_dir) { - _cleanup_close_ int subdir_fd = -1; - - /* if root_dev is set, remove subdirectories only if device is same */ - if (root_dev && st.st_dev != root_dev->st_dev) - continue; - - subdir_fd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); - if (subdir_fd < 0) { - if (ret == 0 && errno != ENOENT) - ret = -errno; - continue; + /* ENOTTY, then it wasn't a btrfs subvolume. continue below. */ + } else + /* It was a subvolume, done. */ + return 1; } - /* Stop at mount points */ - r = fd_is_mount_point(fd, de->d_name, 0); - if (r < 0) { - if (ret == 0 && r != -ENOENT) - ret = r; + if (!allow_recursion) + return -EISDIR; + + int subdir_fd = openat(fd, fname, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (subdir_fd < 0) + return -errno; + + /* We pass REMOVE_PHYSICAL here, to avoid doing the fstatfs() to check the file system type + * again for each directory */ + q = rm_rf_children(subdir_fd, flags | REMOVE_PHYSICAL, root_dev); + } else if (flags & REMOVE_ONLY_DIRECTORIES) + return 0; - continue; - } - if (r > 0) - continue; + r = unlinkat_harder(fd, fname, is_dir ? AT_REMOVEDIR : 0, flags); + if (r < 0) + return r; + if (q < 0) + return q; + return 1; +} - if ((flags & REMOVE_SUBVOLUME) && btrfs_might_be_subvol(&st)) { +typedef struct TodoEntry { + DIR *dir; /* A directory that we were operating on. */ + char *dirname; /* The filename of the directory itself. */ +} TodoEntry; + +static void free_todo_entries(TodoEntry **todos) { + for (TodoEntry *x = *todos; x && x->dir; x++) { + closedir(x->dir); + free(x->dirname); + } - /* This could be a subvolume, try to remove it */ + freep(todos); +} - r = btrfs_subvol_remove_fd(fd, de->d_name, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); - if (r < 0) { - if (!IN_SET(r, -ENOTTY, -EINVAL)) { - if (ret == 0) - ret = r; - - continue; - } - - /* ENOTTY, then it wasn't a btrfs subvolume, continue below. */ - } else - /* It was a subvolume, continue. */ - continue; - } +int rm_rf_children( + int fd, + RemoveFlags flags, + const struct stat *root_dev) { + + _cleanup_(free_todo_entries) TodoEntry *todos = NULL; + size_t n_todo = 0; + _cleanup_free_ char *dirname = NULL; /* Set when we are recursing and want to delete ourselves */ + int ret = 0, r; + + /* Return the first error we run into, but nevertheless try to go on. + * The passed fd is closed in all cases, including on failure. */ + + for (;;) { /* This loop corresponds to the directory nesting level. */ + _cleanup_closedir_ DIR *d = NULL; + + if (n_todo > 0) { + /* We know that we are in recursion here, because n_todo is set. + * We need to remove the inner directory we were operating on. */ + assert(dirname); + r = unlinkat_harder(dirfd(todos[n_todo-1].dir), dirname, AT_REMOVEDIR, flags); + if (r < 0 && r != -ENOENT && ret == 0) + ret = r; + dirname = mfree(dirname); + + /* And now let's back out one level up */ + n_todo --; + d = TAKE_PTR(todos[n_todo].dir); + dirname = TAKE_PTR(todos[n_todo].dirname); + + assert(d); + fd = dirfd(d); /* Retreive the file descriptor from the DIR object. */ + assert(fd >= 0); + } else { +next_fd: + assert(fd >= 0); + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + fd = dirfd(d); /* We donated the fd to fdopendir(). Let's make sure that we have + the right descriptor even if it were to internally invalidate the + one we passed. */ + + if (!(flags & REMOVE_PHYSICAL)) { + struct statfs sfs; + + if (fstatfs(fd, &sfs) < 0) + return -errno; + + if (is_physical_fs(&sfs)) { + /* We refuse to clean physical filesystems with this call, unless explicitly + * requested. This is extra paranoia just to be sure we can + * never ever remove non-state data. */ + + _cleanup_free_ char *path = NULL; + + (void) fd_get_path(fd, &path); + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Attempted to remove disk file system under \"%s\", and we can't allow that.", + strna(path)); + } + } + } + + struct dirent *de; + FOREACH_DIRENT_ALL(de, d, return -errno) { + int is_dir; + + if (dot_or_dot_dot(de->d_name)) + continue; + + is_dir = de->d_type == DT_UNKNOWN ? -1 : de->d_type == DT_DIR; + + r = rm_rf_inner_child(fd, de->d_name, is_dir, flags, root_dev, false); + if (r == -EISDIR) { + /* Push the current working state onto the todo list */ + + if (!GREEDY_REALLOC0(todos, n_todo + 2)) + return log_oom(); + + _cleanup_free_ char *newdirname = strdup(de->d_name); + if (!newdirname) + return log_oom(); + + int newfd = openat(fd, de->d_name, + O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (newfd >= 0) { + todos[n_todo++] = (TodoEntry) { TAKE_PTR(d), TAKE_PTR(dirname) }; + fd = newfd; + dirname = TAKE_PTR(newdirname); + + goto next_fd; + } else if (errno != -ENOENT && ret == 0) + ret = -errno; + } else if (r < 0 && r != -ENOENT && ret == 0) + ret = r; + } + + if (FLAGS_SET(flags, REMOVE_SYNCFS) && syncfs(fd) < 0 && ret >= 0) + ret = -errno; + + if (n_todo == 0) + break; + } - /* We pass REMOVE_PHYSICAL here, to avoid doing the fstatfs() to check the file - * system type again for each directory */ - r = rm_rf_children(TAKE_FD(subdir_fd), flags | REMOVE_PHYSICAL, root_dev); - if (r < 0 && ret == 0) - ret = r; - - r = unlinkat_harder(fd, de->d_name, AT_REMOVEDIR, flags); - if (r < 0 && r != -ENOENT && ret == 0) - ret = r; - - } else if (!(flags & REMOVE_ONLY_DIRECTORIES)) { - - r = unlinkat_harder(fd, de->d_name, 0, flags); - if (r < 0 && r != -ENOENT && ret == 0) - ret = r; - } - } - return ret; + return ret; } int rm_rf(const char *path, RemoveFlags flags) { - int fd, r; - struct statfs s; + int fd, r, q = 0; assert(path); @@ -271,46 +346,62 @@ int rm_rf(const char *path, RemoveFlags } fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); - if (fd < 0) { + if (fd >= 0) { + /* We have a dir */ + + r = rm_rf_children(fd, flags, NULL); + + if (FLAGS_SET(flags, REMOVE_ROOT) && rmdir(path) < 0) + q = -errno; + } else { if (FLAGS_SET(flags, REMOVE_MISSING_OK) && errno == ENOENT) return 0; if (!IN_SET(errno, ENOTDIR, ELOOP)) return -errno; - if (FLAGS_SET(flags, REMOVE_ONLY_DIRECTORIES)) + if (FLAGS_SET(flags, REMOVE_ONLY_DIRECTORIES) || !FLAGS_SET(flags, REMOVE_ROOT)) return 0; - if (FLAGS_SET(flags, REMOVE_ROOT)) { + if (!FLAGS_SET(flags, REMOVE_PHYSICAL)) { + struct statfs s; - if (!FLAGS_SET(flags, REMOVE_PHYSICAL)) { - if (statfs(path, &s) < 0) - return -errno; - - if (is_physical_fs(&s)) - return log_error_errno(SYNTHETIC_ERRNO(EPERM), - "Attempted to remove files from a disk file system under \"%s\", refusing.", - path); - } - - if (unlink(path) < 0) { - if (FLAGS_SET(flags, REMOVE_MISSING_OK) && errno == ENOENT) - return 0; - - return -errno; - } + if (statfs(path, &s) < 0) + return -errno; + + if (is_physical_fs(&s)) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Attempted to remove files from a disk file system under \"%s\", refusing.", + path); } - return 0; + r = 0; + if (unlink(path) < 0) + q = -errno; } - r = rm_rf_children(fd, flags, NULL); + if (r < 0) + return r; + if (q < 0 && (q != -ENOENT || !FLAGS_SET(flags, REMOVE_MISSING_OK))) + return q; + return 0; +} - if (FLAGS_SET(flags, REMOVE_ROOT) && - rmdir(path) < 0 && - r >= 0 && - (!FLAGS_SET(flags, REMOVE_MISSING_OK) || errno != ENOENT)) - r = -errno; +int rm_rf_child(int fd, const char *name, RemoveFlags flags) { + + /* Removes one specific child of the specified directory */ + + if (fd < 0) + return -EBADF; + + if (!filename_is_valid(name)) + return -EINVAL; + + if ((flags & (REMOVE_ROOT|REMOVE_MISSING_OK)) != 0) /* Doesn't really make sense here, we are not supposed to remove 'fd' anyway */ + return -EINVAL; + + if (FLAGS_SET(flags, REMOVE_ONLY_DIRECTORIES|REMOVE_SUBVOLUME)) + return -EINVAL; - return r; + return rm_rf_inner_child(fd, name, -1, flags, NULL, true); } diff -Naurp systemd-249.orig/src/shared/rm-rf.h systemd-249/src/shared/rm-rf.h --- systemd-249.orig/src/shared/rm-rf.h 2021-07-07 12:41:29.000000000 -0500 +++ systemd-249/src/shared/rm-rf.h 2022-01-13 10:41:44.908373789 -0600 @@ -14,6 +14,7 @@ typedef enum RemoveFlags { REMOVE_MISSING_OK = 1 << 4, /* If the top-level directory is missing, ignore the ENOENT for it */ REMOVE_CHMOD = 1 << 5, /* chmod() for write access if we cannot delete or access something */ REMOVE_CHMOD_RESTORE = 1 << 6, /* Restore the old mode before returning */ + REMOVE_SYNCFS = 1 << 7, /* syncfs() the root of the specified directory after removing everything in it */ } RemoveFlags; int unlinkat_harder(int dfd, const char *filename, int unlink_flags, RemoveFlags remove_flags); @@ -23,7 +24,8 @@ int fstatat_harder(int dfd, int fstatat_flags, RemoveFlags remove_flags); -int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev); +int rm_rf_children(int fd, RemoveFlags flags, const struct stat *root_dev); +int rm_rf_child(int fd, const char *name, RemoveFlags flags); int rm_rf(const char *path, RemoveFlags flags); /* Useful for usage with _cleanup_(), destroys a directory and frees the pointer */