Newer
Older
minerva / Kernel / FileSystem / VFSRootContext.cpp
@minerva minerva on 13 Jul 15 KB Initial commit
/*
 * Copyright (c) 2024, Liav A. <liavalb@hotmail.co.il>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <Kernel/FileSystem/RAMFS/FileSystem.h>
#include <Kernel/FileSystem/VFSRootContext.h>
#include <Kernel/FileSystem/VirtualFileSystem.h>

namespace Kernel {

static RawPtr<VFSRootContext> s_empty_context;
static RawPtr<Custody> s_empty_context_custody;
static Atomic<u64> s_vfs_root_context_id = 0;

UNMAP_AFTER_INIT void VFSRootContext::initialize_empty_ramfs_root_context_for_kernel_processes()
{
    s_empty_context = &MUST(VFSRootContext::create_with_empty_ramfs()).leak_ref();
    // NOTE: This custody is immutable, so we expose it also outside of the SpinlockProtected
    // template so it could be accessed immediately.
    s_empty_context_custody = &s_empty_context->root_custody().with([](auto& custody) -> NonnullRefPtr<Custody> { return *custody; }).leak_ref();

    // NOTE: We remove the context from the vfs root contexts list because
    // we leaked a ref, and this context is artificially created only for
    // kernel processes.
    dbgln("VFSRootContext({}): Context is artificially made, detach from global list", s_empty_context->id());
    VFSRootContext::all_root_contexts_list().with([&](auto& list) {
        list.remove(*s_empty_context);
    });
}

VFSRootContext::VFSRootContext(NonnullRefPtr<Custody> custody)
    : m_root_custody(custody)
    , m_id(s_vfs_root_context_id.fetch_add(1))
{
}

VFSRootContext const& VFSRootContext::empty_context_for_kernel_processes()
{
    VERIFY(s_empty_context);
    return *s_empty_context;
}

Custody const& VFSRootContext::empty_context_custody_for_kernel_processes()
{
    VERIFY(s_empty_context_custody);
    return *s_empty_context_custody;
}

ErrorOr<void> VFSRootContext::for_each_mount(Function<ErrorOr<void>(Mount const&)> callback) const
{
    return m_details.with([&](auto& details) -> ErrorOr<void> {
        for (auto& mount : details.mounts)
            TRY(callback(mount));
        return {};
    });
}

void VFSRootContext::add_to_mounts_list_and_increment_fs_mounted_count(DoBindMount do_bind_mount, IntrusiveList<&Mount::m_vfs_list_node>& mounts_list, NonnullOwnPtr<Mount> new_mount)
{
    new_mount->guest_fs().mounted_count().with([&](auto& mounted_count) {
        // NOTE: We increment the mounted counter for the given filesystem regardless of the mount type,
        // as a bind mount also counts as a normal mount from the perspective of unmount(),
        // so we need to keep track of it in order for prepare_to_clear_last_mount() to work properly.
        mounted_count++;

        // NOTE: Add the filesystem to the file systems list if it's not a bind mount (this
        // condition is VERIFYed within the if-case) and this is the first time this FileSystem is mounted.
        // This is symmetric with VirtualFileSystem::unmount()'s `remove()` calls (which remove
        // the FileSystem once it is no longer mounted).
        if (mounted_count == 1) {
            // NOTE: If the mounted_count is 1, and we try to do a bind-mount on an inode
            // from this filesystem this means we have a bug because it's expected that
            // we will always have an already-mounted filesystem when creating a new bind-mount.
            //
            // Even in the odd case of mounting a new filesystem, creating a new bind mount
            // from a source Inode within the same filesystem and then removing the original mountpoint
            // we should still maintain a mounted_count > 1 if somehow new bind mounts from the filesystem inodes
            // appear.
            VERIFY(do_bind_mount != DoBindMount::Yes);

            FileSystem::all_file_systems_list().with([&](auto& fs_list) {
                fs_list.append(new_mount->guest_fs());
            });
        }
    });

    // NOTE: Leak the mount pointer so it can be added to the mount list, but it won't be
    // deleted after being added.
    mounts_list.append(*new_mount.leak_ptr());
}

ErrorOr<NonnullRefPtr<VFSRootContext>> VFSRootContext::create_with_empty_ramfs()
{
    auto fs = TRY(RAMFS::try_create({}));
    TRY(fs->initialize());
    auto root_custody = TRY(Custody::try_create(nullptr, ""sv, fs->root_inode(), 0));
    auto context = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) VFSRootContext(root_custody)));
    auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs->root_inode(), 0)));
    TRY(context->m_details.with([&](auto& details) -> ErrorOr<void> {
        dbgln("VFSRootContext({}): Root (\"/\") FileSystemID {}, Mounting {} at inode {} with flags {}",
            context->id(),
            fs->fsid(),
            fs->class_name(),
            root_custody->inode().identifier(),
            0);
        add_to_mounts_list_and_increment_fs_mounted_count(DoBindMount::No, details.mounts, move(new_mount));
        return {};
    }));

    // Finally, add the context to the global list so it can be used.
    VFSRootContext::all_root_contexts_list().with([&](auto& list) {
        list.append(*context);
    });
    return context;
}

ErrorOr<void> VFSRootContext::pivot_root(FileBackedFileSystem::List& file_backed_file_systems_list, FileSystem& fs, NonnullOwnPtr<Mount> new_mount, NonnullRefPtr<Custody> root_mount_point, int root_mount_flags)
{
    return m_details.with([&](auto& details) -> ErrorOr<void> {
        return fs.mounted_count().with([&](auto& mounted_count) -> ErrorOr<void> {
            // NOTE: If the mounted count is 0, then this filesystem is about to be
            // deleted, so this must be a kernel bug as we don't include such filesystem
            // in the VirtualFileSystem s_details->file_backed_file_systems_list list anymore.
            VERIFY(mounted_count > 0);

            // NOTE: The mounts table should not be empty as it always need
            // to have at least one mount!
            VERIFY(!details.mounts.is_empty());

            // NOTE: If we have many mounts in the table, then simply don't allow
            // userspace to override them but instead require to unmount everything except
            // the root mount first.
            if (details.mounts.size_slow() != 1)
                return EPERM;

            auto& mount = *details.mounts.first();
            TRY(VirtualFileSystem::remove_mount(mount, file_backed_file_systems_list));
            VERIFY(details.mounts.is_empty());

            dbgln("VFSRootContext({}): Root mount set to FileSystemID {}, Mounting {} at inode {} with flags {}",
                id(),
                new_mount->guest_fs().fsid(),
                new_mount->guest_fs().class_name(),
                root_mount_point->inode().identifier(),
                root_mount_flags);

            // NOTE: Leak the mount pointer so it can be added to the mount list, but it won't be
            // deleted after being added.
            details.mounts.append(*new_mount.leak_ptr());

            // NOTE: We essentially do the same thing like VFSRootContext::add_to_mounts_list_and_increment_fs_mounted_count function
            // but because we already locked the spinlock of the attach count, we can't call that function here.
            mounted_count++;
            // NOTE: Now fill the root custody with a valid custody for the new root mount.
            m_root_custody.with([&root_mount_point](auto& custody) {
                custody = move(root_mount_point);
            });
            return {};
        });
    });
}

ErrorOr<void> VFSRootContext::do_full_teardown(Badge<PowerStateSwitchTask>)
{
    // NOTE: We are going to tear down the entire VFS root context from its mounts.
    // To do this properly, we swap out the original root custody with the empty
    // root custody for vfs root context of kernel processes.
    m_root_custody.with([](auto& custody) {
        custody = VFSRootContext::empty_context_custody_for_kernel_processes();
    });

    auto unmount_was_successful = true;
    while (unmount_was_successful) {
        unmount_was_successful = false;
        Vector<Mount&, 16> mounts;
        TRY(m_details.with([&mounts](auto& details) -> ErrorOr<void> {
            for (auto& mount : details.mounts) {
                TRY(mounts.try_append(const_cast<Mount&>(mount)));
            }
            return {};
        }));
        if (mounts.is_empty())
            break;
        auto const remaining_mounts = mounts.size();

        while (!mounts.is_empty()) {
            auto& mount = mounts.take_last();
            TRY(mount.guest_fs().flush_writes());

            auto mount_path = TRY(mount.absolute_path());
            auto& mount_inode = mount.guest();
            auto const result = VirtualFileSystem::unmount(*this, mount_inode, mount_path->view());
            if (result.is_error()) {
                dbgln("Error during unmount of {}: {}", mount_path, result.error());
                // FIXME: For unknown reasons the root FS stays busy even after everything else has shut down and was unmounted.
                //        Until we find the underlying issue, allow an unclean shutdown here.
                if (remaining_mounts <= 1)
                    dbgln("BUG! One mount remaining; the root file system may not be unmountable at all. Shutting down anyways.");
            } else {
                unmount_was_successful = true;
            }
        }
    }
    return {};
}

ErrorOr<void> VFSRootContext::unmount(FileBackedFileSystem::List& file_backed_file_systems_list, Inode& guest_inode, StringView custody_path)
{
    return m_details.with([&](auto& details) -> ErrorOr<void> {
        bool did_unmount = false;
        for (auto& mount : details.mounts) {
            if (&mount.guest() != &guest_inode)
                continue;
            auto mountpoint_path = TRY(mount.absolute_path());
            if (custody_path != mountpoint_path->view())
                continue;

            TRY(VFSRootContext::validate_mount_not_immutable_while_being_used(details, mount));
            dbgln("VFSRootContext({}): Unmounting {}...", id(), custody_path);
            TRY(VirtualFileSystem::remove_mount(mount, file_backed_file_systems_list));
            did_unmount = true;
            break;
        }
        if (!did_unmount) {
            dbgln("VFSRootContext: Nothing mounted on inode {}", guest_inode.identifier());
            return ENODEV;
        }

        // NOTE: The VFSRootContext mount table is not empty and we
        // successfully deleted the desired mount from it, so return
        // a success now.
        if (!details.mounts.is_empty())
            return {};

        // NOTE: If the mount table is empty, then the VFSRootContext
        // is no longer in valid state (each VFSRootContext at least should
        // have a root mount), so remove it now.
        VFSRootContext::all_root_contexts_list().with([this](auto& list) {
            dbgln("VFSRootContext: Nothing mounted in VFSRootContext({}), removing it", id());
            list.remove(*this);
        });
        return {};
    });
}

void VFSRootContext::detach(Badge<Process>)
{
    m_details.with([](auto& details) {
        VERIFY(details.attached_by_process.was_set());
        VERIFY(details.attach_count > 0);
        details.attach_count--;
    });
}

void VFSRootContext::attach(Badge<Process>)
{
    m_details.with([](auto& details) {
        details.attached_by_process.set();
        details.attach_count++;
    });
}

bool VFSRootContext::mount_point_exists_at_custody(Custody& mount_point, Details& details)
{
    return any_of(details.mounts, [&mount_point](auto const& existing_mount) {
        return existing_mount.host_custody() && VirtualFileSystem::check_matching_absolute_path_hierarchy(*existing_mount.host_custody(), mount_point);
    });
}

ErrorOr<void> VFSRootContext::do_on_mount_for_host_custody(ValidateImmutableFlag validate_immutable_flag, Custody const& current_custody, Function<void(Mount&)> callback) const
{
    VERIFY(validate_immutable_flag == ValidateImmutableFlag::Yes || validate_immutable_flag == ValidateImmutableFlag::No);
    return m_details.with([&](auto& details) -> ErrorOr<void> {
        // NOTE: We either search for the root mount or for a mount that has a parent custody!
        if (!current_custody.parent()) {
            for (auto& mount : details.mounts) {
                if (!mount.host_custody()) {
                    if (validate_immutable_flag == ValidateImmutableFlag::Yes)
                        TRY(VFSRootContext::validate_mount_not_immutable_while_being_used(details, mount));

                    callback(mount);
                    return {};
                }
            }
            // NOTE: There must be a root mount entry, so fail if we don't find it.
            VERIFY_NOT_REACHED();
        } else {
            for (auto& mount : details.mounts) {
                if (mount.host_custody() && VirtualFileSystem::check_matching_absolute_path_hierarchy(*mount.host_custody(), current_custody)) {
                    if (validate_immutable_flag == ValidateImmutableFlag::Yes)
                        TRY(VFSRootContext::validate_mount_not_immutable_while_being_used(details, mount));

                    callback(mount);
                    return {};
                }
            }
        }
        return Error::from_errno(ENODEV);
    });
}

ErrorOr<void> VFSRootContext::apply_to_mount_for_host_custody(Custody const& current_custody, Function<void(Mount&)> callback)
{
    return do_on_mount_for_host_custody(ValidateImmutableFlag::Yes, current_custody, move(callback));
}

ErrorOr<VFSRootContext::CurrentMountState> VFSRootContext::current_mount_state_for_host_custody(Custody const& current_custody) const
{
    RefPtr<FileSystem> guest_fs;
    RefPtr<Inode> guest;
    int mount_flags_for_child { 0 };
    TRY(do_on_mount_for_host_custody(ValidateImmutableFlag::No, current_custody, [&guest, &guest_fs, &mount_flags_for_child](auto const& mount) {
        guest = mount.guest();
        guest_fs = mount.guest_fs();
        mount_flags_for_child = mount.flags();
    }));

    return VFSRootContext::CurrentMountState {
        Mount::Details { guest_fs.release_nonnull(), guest.release_nonnull() },
        mount_flags_for_child
    };
}

ErrorOr<void> VFSRootContext::add_new_mount(DoBindMount do_bind_mount, Inode& source, Custody& mount_point, int flags)
{
    auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(source, mount_point, flags)));
    return m_details.with([&](auto& details) -> ErrorOr<void> {
        // NOTE: The VFSRootContext should be attached to the list if there's
        // at least one mount in the mount table.
        // We also should have at least one mount in the table because
        // this method shouldn't be called for new contexts when adding
        // their root mounts.
        VERIFY(!details.mounts.is_empty());
        VFSRootContext::all_root_contexts_list().with([&](auto& list) {
            VERIFY(list.contains(*this));
        });

        VERIFY(&new_mount->guest_fs() == &source.fs());
        if (do_bind_mount == DoBindMount::No) {
            VERIFY(&source == &source.fs().root_inode());
            dbgln("VFSRootContext({}): FileSystemID {}, Mounting {} at inode {} with flags {}",
                id(),
                source.fs().fsid(),
                source.fs().class_name(),
                mount_point.inode().identifier(),
                flags);
        } else {
            dbgln("VFSRootContext({}): Bind-mounting inode {} at inode {}", id(), source.identifier(), mount_point.inode().identifier());
        }

        if (mount_point_exists_at_custody(mount_point, details)) {
            dbgln("VFSRootContext({}): Mounting unsuccessful - inode {} is already a mount-point.", id(), mount_point.inode().identifier());
            return EBUSY;
        }
        add_to_mounts_list_and_increment_fs_mounted_count(do_bind_mount, details.mounts, move(new_mount));
        return {};
    });
}

}