Newer
Older
minerva / Kernel / FileSystem / Ext2FS / Inode.cpp
@minerva minerva on 13 Jul 42 KB Initial commit
/*
 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
 * Copyright (c) 2021, sin-ack <sin-ack@protonmail.com>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/IntegralMath.h>
#include <AK/IterationDecision.h>
#include <AK/MemoryStream.h>
#include <Kernel/API/POSIX/errno.h>
#include <Kernel/Debug.h>
#include <Kernel/FileSystem/Ext2FS/FileSystem.h>
#include <Kernel/FileSystem/Ext2FS/Inode.h>
#include <Kernel/FileSystem/InodeMetadata.h>
#include <Kernel/UnixTypes.h>

namespace Kernel {

static constexpr size_t max_inline_symlink_length = 60;

u8 Ext2FSInode::to_ext2_file_type(mode_t mode)
{
    if (Kernel::is_regular_file(mode))
        return EXT2_FT_REG_FILE;
    if (Kernel::is_directory(mode))
        return EXT2_FT_DIR;
    if (Kernel::is_character_device(mode))
        return EXT2_FT_CHRDEV;
    if (Kernel::is_block_device(mode))
        return EXT2_FT_BLKDEV;
    if (Kernel::is_fifo(mode))
        return EXT2_FT_FIFO;
    if (Kernel::is_socket(mode))
        return EXT2_FT_SOCK;
    if (Kernel::is_symlink(mode))
        return EXT2_FT_SYMLINK;
    return EXT2_FT_UNKNOWN;
}

bool Ext2FSInode::is_within_inode_bounds(FlatPtr base, FlatPtr value_offset, size_t value_size) const
{
    if (value_offset - base - value_size < EXT2_GOOD_OLD_INODE_SIZE)
        return true;

    VERIFY(static_cast<u64>(EXT2_GOOD_OLD_INODE_SIZE + m_raw_inode.i_extra_isize) <= fs().inode_size());

    return value_offset - base + value_size <= static_cast<u64>(EXT2_GOOD_OLD_INODE_SIZE + m_raw_inode.i_extra_isize);
}

ErrorOr<void> Ext2FSInode::write_singly_indirect_block_pointer(BlockBasedFileSystem::BlockIndex logical_block_index, BlockBasedFileSystem::BlockIndex on_disk_index)
{
    auto const entries_per_block = EXT2_ADDR_PER_BLOCK(&fs().super_block());
    auto const block_size = fs().logical_block_size();

    auto offset_in_block = logical_block_index.value() - EXT2_IND_BLOCK;

    auto singly_indirect_block_storage = TRY(ByteBuffer::create_zeroed(block_size));
    auto singly_indirect_block_contents = Span<u32> { bit_cast<u32*>(singly_indirect_block_storage.data()), entries_per_block };
    auto singly_indirect_block_buffer = UserOrKernelBuffer::for_kernel_buffer(singly_indirect_block_storage.data());

    if (m_raw_inode.i_block[EXT2_IND_BLOCK] == 0) [[unlikely]] {
        m_raw_inode.i_block[EXT2_IND_BLOCK] = TRY(allocate_and_zero_block());
        set_metadata_dirty(true);
    }

    TRY(fs().read_block(m_raw_inode.i_block[EXT2_IND_BLOCK], &singly_indirect_block_buffer, block_size, 0));

    singly_indirect_block_contents[offset_in_block] = on_disk_index.value();
    TRY(fs().write_block(m_raw_inode.i_block[EXT2_IND_BLOCK], singly_indirect_block_buffer, block_size));

    if (on_disk_index != 0)
        return {};

    if (!singly_indirect_block_contents.filled_with(0))
        return {};

    TRY(fs().set_block_allocation_state(m_raw_inode.i_block[EXT2_IND_BLOCK], false));
    m_raw_inode.i_block[EXT2_IND_BLOCK] = 0;
    m_raw_inode.i_blocks -= fs().i_blocks_increment();
    set_metadata_dirty(true);

    return {};
}

ErrorOr<void> Ext2FSInode::write_doubly_indirect_block_pointer(BlockBasedFileSystem::BlockIndex logical_block_index, BlockBasedFileSystem::BlockIndex on_disk_index)
{
    auto const entries_per_block = EXT2_ADDR_PER_BLOCK(&fs().super_block());
    auto const block_size = fs().logical_block_size();

    auto const offset = logical_block_index.value() - singly_indirect_block_capacity();
    auto const offset_in_doubly_indirect_block = offset / entries_per_block;
    auto const offset_in_singly_indirect_block = offset % entries_per_block;

    auto doubly_indirect_block_storage = TRY(ByteBuffer::create_zeroed(block_size));
    auto doubly_indirect_block_contents = Span<u32> { bit_cast<u32*>(doubly_indirect_block_storage.data()), entries_per_block };
    auto doubly_indirect_block_buffer = UserOrKernelBuffer::for_kernel_buffer(doubly_indirect_block_storage.data());

    auto singly_indirect_block_storage = TRY(ByteBuffer::create_zeroed(block_size));
    auto singly_indirect_block_contents = Span<u32> { bit_cast<u32*>(singly_indirect_block_storage.data()), entries_per_block };
    auto singly_indirect_block_buffer = UserOrKernelBuffer::for_kernel_buffer(singly_indirect_block_storage.data());

    if (m_raw_inode.i_block[EXT2_DIND_BLOCK] == 0) [[unlikely]] {
        m_raw_inode.i_block[EXT2_DIND_BLOCK] = TRY(allocate_and_zero_block());
        set_metadata_dirty(true);
    }

    TRY(fs().read_block(m_raw_inode.i_block[EXT2_DIND_BLOCK], &doubly_indirect_block_buffer, block_size, 0));

    if (doubly_indirect_block_contents[offset_in_doubly_indirect_block] == 0) [[unlikely]] {
        doubly_indirect_block_contents[offset_in_doubly_indirect_block] = TRY(allocate_and_zero_block());
        TRY(fs().write_block(m_raw_inode.i_block[EXT2_DIND_BLOCK], doubly_indirect_block_buffer, block_size));
    }

    TRY(fs().read_block(doubly_indirect_block_contents[offset_in_doubly_indirect_block], &singly_indirect_block_buffer, block_size, 0));

    singly_indirect_block_contents[offset_in_singly_indirect_block] = on_disk_index.value();
    TRY(fs().write_block(doubly_indirect_block_contents[offset_in_doubly_indirect_block], singly_indirect_block_buffer, block_size));

    if (on_disk_index != 0)
        return {};

    if (!singly_indirect_block_contents.filled_with(0))
        return {};

    TRY(fs().set_block_allocation_state(doubly_indirect_block_contents[offset_in_doubly_indirect_block], false));
    doubly_indirect_block_contents[offset_in_doubly_indirect_block] = 0;
    m_raw_inode.i_blocks -= fs().i_blocks_increment();
    TRY(fs().write_block(m_raw_inode.i_block[EXT2_DIND_BLOCK], doubly_indirect_block_buffer, block_size));

    if (!doubly_indirect_block_contents.filled_with(0))
        return {};

    TRY(fs().set_block_allocation_state(m_raw_inode.i_block[EXT2_DIND_BLOCK], false));
    m_raw_inode.i_block[EXT2_DIND_BLOCK] = 0;
    m_raw_inode.i_blocks -= fs().i_blocks_increment();
    set_metadata_dirty(true);

    return {};
}

ErrorOr<void> Ext2FSInode::write_triply_indirect_block_pointer(BlockBasedFileSystem::BlockIndex logical_block_index, BlockBasedFileSystem::BlockIndex on_disk_index)
{
    auto const entries_per_block = EXT2_ADDR_PER_BLOCK(&fs().super_block());
    auto const block_size = fs().logical_block_size();

    auto const offset = logical_block_index.value() - doubly_indirect_block_capacity();
    auto const offset_in_triply_indirect_block = offset / (entries_per_block * entries_per_block);
    auto const skipped_blocks = entries_per_block * entries_per_block * offset_in_triply_indirect_block;
    auto const offset_in_doubly_indirect_block = (offset - skipped_blocks) / entries_per_block;
    auto const offset_in_singly_indirect_block = offset % entries_per_block;

    auto triply_indirect_block_storage = TRY(ByteBuffer::create_zeroed(block_size));
    auto triply_indirect_block_contents = Span<u32> { bit_cast<u32*>(triply_indirect_block_storage.data()), entries_per_block };
    auto triply_indirect_block_buffer = UserOrKernelBuffer::for_kernel_buffer(triply_indirect_block_storage.data());

    auto doubly_indirect_block_storage = TRY(ByteBuffer::create_zeroed(block_size));
    auto doubly_indirect_block_contents = Span<u32> { bit_cast<u32*>(doubly_indirect_block_storage.data()), entries_per_block };
    auto doubly_indirect_block_buffer = UserOrKernelBuffer::for_kernel_buffer(doubly_indirect_block_storage.data());

    auto singly_indirect_block_storage = TRY(ByteBuffer::create_zeroed(block_size));
    auto singly_indirect_block_contents = Span<u32> { bit_cast<u32*>(singly_indirect_block_storage.data()), entries_per_block };
    auto singly_indirect_block_buffer = UserOrKernelBuffer::for_kernel_buffer(singly_indirect_block_storage.data());

    if (m_raw_inode.i_block[EXT2_TIND_BLOCK] == 0) [[unlikely]] {
        m_raw_inode.i_block[EXT2_TIND_BLOCK] = TRY(allocate_and_zero_block());
        set_metadata_dirty(true);
    }

    TRY(fs().read_block(m_raw_inode.i_block[EXT2_TIND_BLOCK], &triply_indirect_block_buffer, block_size, 0));

    if (triply_indirect_block_contents[offset_in_triply_indirect_block] == 0) [[unlikely]] {
        triply_indirect_block_contents[offset_in_triply_indirect_block] = TRY(allocate_and_zero_block());
        TRY(fs().write_block(m_raw_inode.i_block[EXT2_TIND_BLOCK], triply_indirect_block_buffer, block_size));
    }

    TRY(fs().read_block(triply_indirect_block_contents[offset_in_triply_indirect_block], &doubly_indirect_block_buffer, block_size, 0));

    if (doubly_indirect_block_contents[offset_in_doubly_indirect_block] == 0) [[unlikely]] {
        doubly_indirect_block_contents[offset_in_doubly_indirect_block] = TRY(allocate_and_zero_block());
        TRY(fs().write_block(triply_indirect_block_contents[offset_in_triply_indirect_block], doubly_indirect_block_buffer, block_size));
    }

    TRY(fs().read_block(doubly_indirect_block_contents[offset_in_doubly_indirect_block], &singly_indirect_block_buffer, block_size, 0));

    singly_indirect_block_contents[offset_in_singly_indirect_block] = on_disk_index.value();
    TRY(fs().write_block(doubly_indirect_block_contents[offset_in_doubly_indirect_block], singly_indirect_block_buffer, block_size));

    if (on_disk_index != 0)
        return {};

    if (!singly_indirect_block_contents.filled_with(0))
        return {};

    TRY(fs().set_block_allocation_state(doubly_indirect_block_contents[offset_in_doubly_indirect_block], false));
    doubly_indirect_block_contents[offset_in_doubly_indirect_block] = 0;
    m_raw_inode.i_blocks -= fs().i_blocks_increment();
    TRY(fs().write_block(triply_indirect_block_contents[offset_in_triply_indirect_block], doubly_indirect_block_buffer, block_size));

    if (!doubly_indirect_block_contents.filled_with(0))
        return {};

    TRY(fs().set_block_allocation_state(triply_indirect_block_contents[offset_in_triply_indirect_block], false));
    triply_indirect_block_contents[offset_in_triply_indirect_block] = 0;
    m_raw_inode.i_blocks -= fs().i_blocks_increment();
    TRY(fs().write_block(m_raw_inode.i_block[EXT2_TIND_BLOCK], triply_indirect_block_buffer, block_size));

    if (!triply_indirect_block_contents.filled_with(0))
        return {};

    TRY(fs().set_block_allocation_state(m_raw_inode.i_block[EXT2_TIND_BLOCK], false));
    m_raw_inode.i_block[EXT2_TIND_BLOCK] = 0;
    m_raw_inode.i_blocks -= fs().i_blocks_increment();
    set_metadata_dirty(true);

    return {};
}

ErrorOr<u32> Ext2FSInode::allocate_and_zero_block()
{
    auto const block_size = fs().logical_block_size();

    auto blocks = TRY(fs().allocate_blocks(fs().group_index_from_inode(index()), 1));
    m_raw_inode.i_blocks += fs().i_blocks_increment();
    auto block = blocks.first();

    auto buffer_content = TRY(ByteBuffer::create_zeroed(block_size));
    TRY(fs().write_block(block, UserOrKernelBuffer::for_kernel_buffer(buffer_content.data()), block_size));
    return block.value();
}

ErrorOr<void> Ext2FSInode::write_block_pointer(BlockBasedFileSystem::BlockIndex logical_block_index, BlockBasedFileSystem::BlockIndex on_disk_index)
{
    VERIFY(m_inode_lock.is_locked());

    if (logical_block_index < EXT2_NDIR_BLOCKS) {
        if (m_raw_inode.i_block[logical_block_index.value()] != on_disk_index) {
            m_raw_inode.i_block[logical_block_index.value()] = on_disk_index.value();
            set_metadata_dirty(true);
        }
        return {};
    }

    if (logical_block_index < singly_indirect_block_capacity())
        return write_singly_indirect_block_pointer(logical_block_index, on_disk_index);

    if (logical_block_index < doubly_indirect_block_capacity())
        return write_doubly_indirect_block_pointer(logical_block_index, on_disk_index);

    if (logical_block_index < triply_indirect_block_capacity())
        return write_triply_indirect_block_pointer(logical_block_index, on_disk_index);

    VERIFY_NOT_REACHED();
}

ErrorOr<Ext2FS::BlockList> Ext2FSInode::compute_block_list(BlockBasedFileSystem::BlockIndex first_block, BlockBasedFileSystem::BlockIndex last_block) const
{
    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::block_list_for_inode(): i_size={}, i_blocks={}", identifier(), m_raw_inode.i_size, m_raw_inode.i_blocks);
    Ext2FS::BlockList list {};

    // If we are handling a symbolic link, the path is stored in the 60 bytes in
    // the inode that are used for the 12 direct and 3 indirect block pointers,
    // If the path is longer than 60 characters, a block is allocated, and the
    // block contains the destination path. The file size corresponds to the
    // path length of the destination.
    if (Kernel::is_symlink(m_raw_inode.i_mode) && m_raw_inode.i_blocks == 0)
        return list;

    unsigned const block_size = fs().logical_block_size();
    unsigned const entries_per_block = EXT2_ADDR_PER_BLOCK(&fs().super_block());

    auto set_block = [&](auto logical_index, auto on_disk_index) -> ErrorOr<IterationDecision> {
        if (logical_index < first_block)
            return IterationDecision::Continue;
        if (logical_index > last_block)
            return IterationDecision::Break;

        TRY(list.try_set(logical_index, on_disk_index));
        return IterationDecision::Continue;
    };

    auto process_block_array = [&](auto current_logical_index, unsigned level, auto array_block_index, ByteBuffer& array_storage, auto&& callback) -> ErrorOr<IterationDecision> {
        TRY(array_storage.try_resize(block_size));
        auto* array = (u32*)array_storage.data();
        auto buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)array);
        TRY(fs().read_block(array_block_index, &buffer, block_size, 0));
        for (unsigned i = 0; i < block_size / sizeof(u32); ++i) {
            if (array[i] != 0) {
                if (TRY(callback(current_logical_index + i * AK::pow(entries_per_block, level - 1), array[i])) == IterationDecision::Break)
                    return IterationDecision::Break;
            }
        }
        return IterationDecision::Continue;
    };

    if (first_block < EXT2_NDIR_BLOCKS) {
        for (size_t i = 0; i < EXT2_NDIR_BLOCKS; ++i) {
            if (m_raw_inode.i_block[i] != 0)
                TRY(set_block(i, m_raw_inode.i_block[i]));
        }
    }

    ByteBuffer block_storage[3] = {};

    if (first_block < singly_indirect_block_capacity() && m_raw_inode.i_block[EXT2_IND_BLOCK]) {
        TRY(process_block_array(EXT2_NDIR_BLOCKS, 1, m_raw_inode.i_block[EXT2_IND_BLOCK], block_storage[0], [&](auto logical_block_index, auto on_disk_index) -> ErrorOr<IterationDecision> {
            return set_block(logical_block_index, on_disk_index);
        }));
    }

    if (first_block < doubly_indirect_block_capacity() && m_raw_inode.i_block[EXT2_DIND_BLOCK]) {
        TRY(process_block_array(singly_indirect_block_capacity(), 2, m_raw_inode.i_block[EXT2_DIND_BLOCK], block_storage[1], [&](auto logical_block_index, auto on_disk_index) -> ErrorOr<IterationDecision> {
            return process_block_array(logical_block_index, 1, on_disk_index, block_storage[0], [&](auto logical_block_index2, auto on_disk_index2) -> ErrorOr<IterationDecision> {
                return set_block(logical_block_index2, on_disk_index2);
            });
        }));
    }

    if (first_block < triply_indirect_block_capacity() && m_raw_inode.i_block[EXT2_TIND_BLOCK]) {
        TRY(process_block_array(doubly_indirect_block_capacity(), 3, m_raw_inode.i_block[EXT2_TIND_BLOCK], block_storage[2], [&](auto logical_block_index, auto on_disk_index) -> ErrorOr<IterationDecision> {
            return process_block_array(logical_block_index, 2, on_disk_index, block_storage[1], [&](auto logical_block_index2, auto on_disk_index2) -> ErrorOr<IterationDecision> {
                return process_block_array(logical_block_index2, 1, on_disk_index2, block_storage[0], [&](auto logical_block_index3, auto on_disk_index3) -> ErrorOr<IterationDecision> {
                    return set_block(logical_block_index3, on_disk_index3);
                });
            });
        }));
    }

    return list;
}

ErrorOr<void> Ext2FSInode::free_all_blocks()
{
    MutexLocker locker(m_inode_lock);

    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::free_all_blocks(): i_size={}, i_blocks={}", identifier(), m_raw_inode.i_size, m_raw_inode.i_blocks);

    if (Kernel::is_symlink(m_raw_inode.i_mode) && m_raw_inode.i_blocks == 0)
        return {};

    unsigned const block_size = fs().logical_block_size();
    unsigned const entries_per_block = EXT2_ADDR_PER_BLOCK(&fs().super_block());

    auto deallocate_block = [&](auto on_disk_block_index) -> ErrorOr<void> {
        TRY(fs().set_block_allocation_state(on_disk_block_index, false));
        m_raw_inode.i_blocks -= fs().i_blocks_increment();
        return {};
    };

    auto process_block_array = [&](auto current_logical_index, unsigned level, auto array_block_index, ByteBuffer& array_storage, auto&& callback) -> ErrorOr<void> {
        TRY(array_storage.try_resize(block_size));

        auto* array = bit_cast<u32*>(array_storage.data());
        auto buffer = UserOrKernelBuffer::for_kernel_buffer(bit_cast<u8*>(array));
        TRY(fs().read_block(array_block_index, &buffer, block_size, 0));

        for (unsigned i = 0; i < block_size / sizeof(u32); ++i) {
            if (array[i] != 0)
                TRY(callback(current_logical_index + i * AK::pow(entries_per_block, level - 1), array[i]));
        }

        return deallocate_block(array_block_index);
    };

    for (size_t i = 0; i < EXT2_NDIR_BLOCKS; ++i) {
        if (m_raw_inode.i_block[i] != 0)
            TRY(deallocate_block(m_raw_inode.i_block[i]));
    }

    ByteBuffer block_storage[3] = {};

    if (m_raw_inode.i_block[EXT2_IND_BLOCK]) {
        TRY(process_block_array(EXT2_NDIR_BLOCKS, 1, m_raw_inode.i_block[EXT2_IND_BLOCK], block_storage[0], [&]([[maybe_unused]] auto logical_block_index, auto on_disk_index) -> ErrorOr<void> {
            return deallocate_block(on_disk_index);
        }));
    }

    if (m_raw_inode.i_block[EXT2_DIND_BLOCK]) {
        TRY(process_block_array(singly_indirect_block_capacity(), 2, m_raw_inode.i_block[EXT2_DIND_BLOCK], block_storage[1], [&](auto logical_block_index, auto on_disk_index) -> ErrorOr<void> {
            return process_block_array(logical_block_index, 1, on_disk_index, block_storage[0], [&]([[maybe_unused]] auto logical_block_index2, auto on_disk_index2) -> ErrorOr<void> {
                return deallocate_block(on_disk_index2);
            });
        }));
    }

    if (m_raw_inode.i_block[EXT2_TIND_BLOCK]) {
        TRY(process_block_array(doubly_indirect_block_capacity(), 3, m_raw_inode.i_block[EXT2_TIND_BLOCK], block_storage[2], [&](auto logical_block_index, auto on_disk_index) -> ErrorOr<void> {
            return process_block_array(logical_block_index, 2, on_disk_index, block_storage[1], [&](auto logical_block_index2, auto on_disk_index2) -> ErrorOr<void> {
                return process_block_array(logical_block_index2, 1, on_disk_index2, block_storage[0], [&]([[maybe_unused]] auto logical_block_index3, auto on_disk_index3) -> ErrorOr<void> {
                    return deallocate_block(on_disk_index3);
                });
            });
        }));
    }

    return {};
}

Ext2FSInode::Ext2FSInode(Ext2FS& fs, InodeIndex index)
    : Inode(fs, index)
    , m_block_view(*this)
{
}

Ext2FSInode::~Ext2FSInode()
{
    if (m_raw_inode.i_links_count == 0) {
        // Alas, we have nowhere to propagate any errors that occur here.
        (void)fs().free_inode(*this);
    }
}

u64 Ext2FSInode::size() const
{
    if (Kernel::is_regular_file(m_raw_inode.i_mode) && ((u32)fs().get_features_readonly() & (u32)Ext2FS::FeaturesReadOnly::FileSize64bits))
        return static_cast<u64>(m_raw_inode.i_dir_acl) << 32 | m_raw_inode.i_size;
    return m_raw_inode.i_size;
}

InodeMetadata Ext2FSInode::metadata() const
{
    MutexLocker locker(m_inode_lock);
    InodeMetadata metadata;
    metadata.inode = identifier();
    metadata.size = size();
    metadata.mode = m_raw_inode.i_mode;
    metadata.uid = inode_uid(m_raw_inode);
    metadata.gid = inode_gid(m_raw_inode);
    metadata.link_count = m_raw_inode.i_links_count;

    auto decode_time = [this](u32 const& time, u32 const& time_extra) {
        // NOTE: All the *_extra fields have to be bounds-checked in case we have oddly-sized inodes.
        // This is simply a correctness measure, since an OOB read wouldn't happen anyway due to the
        // fact that we always store the raw inode as an ext2_inode_large.
        if (is_within_inode_bounds(bit_cast<FlatPtr>(&m_raw_inode), bit_cast<FlatPtr>(&time_extra), sizeof(time_extra))) {
            time_t seconds = decode_seconds_with_extra(time, time_extra);
            u32 nanoseconds = decode_nanoseconds_from_extra(time_extra);
            return UnixDateTime::from_unix_timespec({ .tv_sec = seconds, .tv_nsec = nanoseconds });
        }

        return UnixDateTime::from_seconds_since_epoch(static_cast<i32>(time));
    };

    metadata.atime = decode_time(m_raw_inode.i_atime, m_raw_inode.i_atime_extra);
    metadata.mtime = decode_time(m_raw_inode.i_mtime, m_raw_inode.i_mtime_extra);

    // NOTE: There's no i_dtime_extra, so we use i_ctime_extra to approximate the right epoch for metadata.dtime.
    if (is_within_inode_bounds(bit_cast<FlatPtr>(&m_raw_inode), bit_cast<FlatPtr>(&m_raw_inode.i_ctime_extra), sizeof(m_raw_inode.i_ctime_extra))) {
        time_t ctime_seconds = decode_seconds_with_extra(m_raw_inode.i_ctime, m_raw_inode.i_ctime_extra);
        u32 ctime_nanoseconds = decode_nanoseconds_from_extra(m_raw_inode.i_ctime_extra);

        metadata.ctime = UnixDateTime::from_unix_timespec({ .tv_sec = ctime_seconds, .tv_nsec = ctime_nanoseconds });
        metadata.dtime = UnixDateTime::from_seconds_since_epoch(decode_seconds_with_extra(m_raw_inode.i_dtime, m_raw_inode.i_ctime_extra));
    } else {
        metadata.ctime = UnixDateTime::from_seconds_since_epoch(static_cast<i32>(m_raw_inode.i_ctime));
        metadata.dtime = UnixDateTime::from_seconds_since_epoch(static_cast<i32>(m_raw_inode.i_dtime));
    }

    metadata.block_size = fs().logical_block_size();
    metadata.block_count = m_raw_inode.i_blocks;

    if (Kernel::is_character_device(m_raw_inode.i_mode) || Kernel::is_block_device(m_raw_inode.i_mode)) {
        unsigned dev = m_raw_inode.i_block[0];
        if (!dev)
            dev = m_raw_inode.i_block[1];
        metadata.major_device = (dev & 0xfff00) >> 8;
        metadata.minor_device = (dev & 0xff) | ((dev >> 12) & 0xfff00);
    }
    return metadata;
}

ErrorOr<void> Ext2FSInode::flush_metadata()
{
    MutexLocker locker(m_inode_lock);
    if (!is_metadata_dirty())
        return {};

    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::flush_metadata(): Flushing inode", identifier());
    TRY(fs().write_ext2_inode(index(), m_raw_inode));
    if (is_directory()) {
        // Unless we're about to go away permanently, invalidate the lookup cache.
        if (m_raw_inode.i_links_count != 0) {
            // FIXME: This invalidation is way too hardcore. It's sad to throw away the whole cache.
            m_lookup_cache.clear();
        }
    }
    set_metadata_dirty(false);
    return {};
}

ErrorOr<size_t> Ext2FSInode::read_bytes_locked(off_t offset, size_t count, UserOrKernelBuffer& buffer, OpenFileDescription* description) const
{
    VERIFY(m_inode_lock.is_locked());
    VERIFY(offset >= 0);
    if (m_raw_inode.i_size == 0)
        return 0;

    if (static_cast<u64>(offset) >= size())
        return 0;

    // Symbolic links shorter than 60 characters are store inline inside the i_block array.
    // This avoids wasting an entire block on short links. (Most links are short.)
    if (is_symlink() && size() < max_inline_symlink_length) {
        VERIFY(offset == 0);
        size_t nread = min((off_t)size() - offset, static_cast<off_t>(count));
        TRY(buffer.write(((u8 const*)m_raw_inode.i_block) + offset, nread));
        return nread;
    }

    bool allow_cache = !description || !description->is_direct();

    int const block_size = fs().logical_block_size();

    BlockBasedFileSystem::BlockIndex first_block_logical_index = offset / block_size;

    int offset_into_first_block = offset % block_size;

    size_t nread = 0;
    auto remaining_count = min((off_t)count, (off_t)size() - offset);
    auto current_block_logical_index = first_block_logical_index;

    dbgln_if(EXT2_VERY_DEBUG, "Ext2FSInode[{}]::read_bytes(): Reading up to {} bytes, {} bytes into inode to {}", identifier(), count, offset, buffer.user_or_kernel_ptr());

    while (remaining_count) {
        auto block_index = TRY(m_block_view.get_block(current_block_logical_index));
        size_t offset_into_block = (current_block_logical_index == first_block_logical_index) ? offset_into_first_block : 0;
        size_t num_bytes_to_copy = min((size_t)block_size - offset_into_block, (size_t)remaining_count);
        auto buffer_offset = buffer.offset(nread);
        if (block_index.value() == 0) {
            // This is a hole, act as if it's filled with zeroes.
            TRY(buffer_offset.memset(0, num_bytes_to_copy));
        } else {
            if (auto result = fs().read_block(block_index, &buffer_offset, num_bytes_to_copy, offset_into_block, allow_cache); result.is_error()) {
                dmesgln("Ext2FSInode[{}]::read_bytes(): Failed to read block {} (index {})", identifier(), block_index.value(), current_block_logical_index);
                return result.release_error();
            }
        }
        current_block_logical_index = current_block_logical_index.value() + 1;
        remaining_count -= num_bytes_to_copy;
        nread += num_bytes_to_copy;
    }

    return nread;
}

ErrorOr<void> Ext2FSInode::resize(u64 new_size)
{
    VERIFY(m_inode_lock.is_locked());
    if (size() == new_size)
        return {};

    if (!((u32)fs().get_features_readonly() & (u32)Ext2FS::FeaturesReadOnly::FileSize64bits) && (new_size >= static_cast<u32>(-1)))
        return ENOSPC;

    if (new_size < size()) {
        auto block_size = fs().logical_block_size();
        BlockBasedFileSystem::BlockIndex first_block_logical_index = ceil_div(new_size, block_size);
        BlockBasedFileSystem::BlockIndex last_block_logical_index = size() / block_size;

        for (auto bi = first_block_logical_index; bi <= last_block_logical_index; bi = bi.value() + 1) {
            auto block = TRY(m_block_view.get_block(bi));
            if (block == 0) {
                // This is a hole, skip it.
                continue;
            }
            if (auto result = fs().set_block_allocation_state(block, false); result.is_error()) {
                dbgln("Ext2FSInode[{}]::resize(): Failed to free block {}: {}", identifier(), block, result.error());
                return result;
            }
            m_raw_inode.i_blocks -= fs().i_blocks_increment();
            TRY(m_block_view.write_block_pointer(bi, 0));
        }
    }

    m_raw_inode.i_size = new_size;
    if (Kernel::is_regular_file(m_raw_inode.i_mode))
        m_raw_inode.i_dir_acl = new_size >> 32;

    set_metadata_dirty(true);
    return {};
}

ErrorOr<size_t> Ext2FSInode::write_bytes_locked(off_t offset, size_t count, UserOrKernelBuffer const& data, OpenFileDescription* description)
{
    VERIFY(m_inode_lock.is_locked());
    VERIFY(offset >= 0);

    if (count == 0)
        return 0;

    if (is_symlink()) {
        VERIFY(offset == 0);
        if (max((size_t)(offset + count), (size_t)m_raw_inode.i_size) < max_inline_symlink_length) {
            dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::write_bytes_locked(): Poking into i_block array for inline symlink ({} bytes)", identifier(), count);
            TRY(data.read(((u8*)m_raw_inode.i_block) + offset, count));
            if ((size_t)(offset + count) > (size_t)m_raw_inode.i_size)
                m_raw_inode.i_size = offset + count;
            set_metadata_dirty(true);
            return count;
        }
    }

    bool allow_cache = !description || !description->is_direct();

    auto const block_size = fs().logical_block_size();
    auto new_size = max(static_cast<u64>(offset) + count, size());

    TRY(resize(new_size));

    BlockBasedFileSystem::BlockIndex first_block_logical_index = offset / block_size;

    size_t offset_into_first_block = offset % block_size;

    size_t nwritten = 0;
    auto remaining_count = min((off_t)count, (off_t)new_size - offset);
    auto current_block_logical_index = first_block_logical_index;

    dbgln_if(EXT2_VERY_DEBUG, "Ext2FSInode[{}]::write_bytes_locked(): Writing {} bytes, {} bytes into inode from {}", identifier(), count, offset, data.user_or_kernel_ptr());

    while (remaining_count) {
        size_t offset_into_block = (current_block_logical_index == first_block_logical_index) ? offset_into_first_block : 0;
        size_t num_bytes_to_copy = min((size_t)block_size - offset_into_block, (size_t)remaining_count);
        auto block_index = TRY(m_block_view.get_or_allocate_block(current_block_logical_index, num_bytes_to_copy != block_size, allow_cache));
        TRY(m_block_view.write_block_pointer(current_block_logical_index, block_index));

        dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::write_bytes_locked(): Writing block {} (offset_into_block: {})", identifier(), block_index, offset_into_block);
        if (auto result = fs().write_block(block_index, data.offset(nwritten), num_bytes_to_copy, offset_into_block, allow_cache); result.is_error()) {
            dbgln("Ext2FSInode[{}]::write_bytes_locked(): Failed to write block {} (index {})", identifier(), block_index, current_block_logical_index);
            return result.release_error();
        }
        current_block_logical_index = current_block_logical_index.value() + 1;
        remaining_count -= num_bytes_to_copy;
        nwritten += num_bytes_to_copy;
    }

    did_modify_contents();

    dbgln_if(EXT2_VERY_DEBUG, "Ext2FSInode[{}]::write_bytes_locked(): After write, i_size={}, i_blocks={}", identifier(), size(), m_raw_inode.i_blocks);
    return nwritten;
}

ErrorOr<void> Ext2FSInode::traverse_as_directory(Function<ErrorOr<void>(FileSystem::DirectoryEntryView const&)> callback) const
{
    MutexLocker locker(m_inode_lock);
    VERIFY(is_directory());

    u8 buffer[max_block_size];
    auto buf = UserOrKernelBuffer::for_kernel_buffer(buffer);

    auto block_size = fs().logical_block_size();
    auto file_size = size();

    bool has_file_type_attribute = has_flag(fs().get_features_optional(), Ext2FS::FeaturesOptional::ExtendedAttributes);

    // Directory entries are guaranteed not to span multiple blocks,
    // so we can iterate over blocks separately.

    for (u64 offset = 0; offset < file_size; offset += block_size) {
        TRY(read_bytes(offset, block_size, buf, nullptr));

        using ext2_extended_dir_entry = ext2_dir_entry_2;
        auto* entry = reinterpret_cast<ext2_extended_dir_entry*>(buffer);
        auto* entries_end = reinterpret_cast<ext2_extended_dir_entry*>(buffer + block_size);
        while (entry < entries_end) {
            if (entry->inode != 0) {
                dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::traverse_as_directory(): inode {}, name_len: {}, rec_len: {}, file_type: {}, name: {}", identifier(), entry->inode, entry->name_len, entry->rec_len, entry->file_type, StringView(entry->name, entry->name_len));
                TRY(callback({ { entry->name, entry->name_len }, { fsid(), entry->inode }, has_file_type_attribute ? entry->file_type : (u8)EXT2_FT_UNKNOWN }));
            }
            entry = (ext2_extended_dir_entry*)((char*)entry + entry->rec_len);
        }
    }

    return {};
}

ErrorOr<void> Ext2FSInode::write_directory(Vector<Ext2FSDirectoryEntry>& entries)
{
    MutexLocker locker(m_inode_lock);
    auto block_size = fs().logical_block_size();

    // Calculate directory size and record length of entries so that
    // the following constraints are met:
    // - All used blocks must be entirely filled.
    // - Entries are aligned on a 4-byte boundary.
    // - No entry may span multiple blocks.
    size_t directory_size = 0;
    size_t space_in_block = block_size;
    for (size_t i = 0; i < entries.size(); ++i) {
        auto& entry = entries[i];
        entry.record_length = EXT2_DIR_REC_LEN(entry.name->length());
        space_in_block -= entry.record_length;
        if (i + 1 < entries.size()) {
            if (EXT2_DIR_REC_LEN(entries[i + 1].name->length()) > space_in_block) {
                entry.record_length += space_in_block;
                space_in_block = block_size;
            }
        } else {
            entry.record_length += space_in_block;
        }
        directory_size += entry.record_length;
    }

    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::write_directory(): New directory contents to write (size {}):", identifier(), directory_size);

    auto directory_data = TRY(ByteBuffer::create_uninitialized(directory_size));
    FixedMemoryStream stream { directory_data.bytes() };
    bool has_file_type_attribute = has_flag(fs().get_features_optional(), Ext2FS::FeaturesOptional::ExtendedAttributes);

    for (auto& entry : entries) {
        dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::write_directory(): Writing inode: {}, name_len: {}, rec_len: {}, file_type: {}, name: {}", identifier(), entry.inode_index, u16(entry.name->length()), u16(entry.record_length), u8(entry.file_type), entry.name);

        MUST(stream.write_value<u32>(entry.inode_index.value()));
        MUST(stream.write_value<u16>(entry.record_length));
        MUST(stream.write_value<u8>(entry.name->length()));
        MUST(stream.write_value<u8>(has_file_type_attribute ? entry.file_type : EXT2_FT_UNKNOWN));
        MUST(stream.write_until_depleted(entry.name->bytes()));
        int padding = entry.record_length - entry.name->length() - 8;
        for (int j = 0; j < padding; ++j)
            MUST(stream.write_value<u8>(0));
    }

    auto serialized_bytes_count = TRY(stream.tell());
    VERIFY(serialized_bytes_count == directory_size);

    TRY(resize(serialized_bytes_count));

    auto buffer = UserOrKernelBuffer::for_kernel_buffer(directory_data.data());
    auto nwritten = TRY(prepare_and_write_bytes_locked(0, serialized_bytes_count, buffer, nullptr));
    set_metadata_dirty(true);
    if (nwritten != directory_data.size())
        return EIO;
    return {};
}

ErrorOr<BlockBasedFileSystem::BlockIndex> Ext2FSInode::allocate_block(BlockBasedFileSystem::BlockIndex block_index, bool zero_newly_allocated_block, bool allow_cache)
{
    // FIXME: Preallocate some extra blocks here.
    auto blocks = TRY(fs().allocate_blocks(fs().group_index_from_inode(index()), 1));
    m_raw_inode.i_blocks += fs().i_blocks_increment();

    VERIFY(blocks.size() == 1);
    auto block = blocks.first();

    if (zero_newly_allocated_block) {
        u8 zero_buffer[PAGE_SIZE] {};
        if (auto result = fs().write_block(block, UserOrKernelBuffer::for_kernel_buffer(zero_buffer), fs().logical_block_size(), 0, allow_cache); result.is_error()) {
            dbgln("Ext2FSInode[{}]::allocate_block(): Failed to zero block {} (index {})", identifier(), block, block_index);
            return result.release_error();
        }
    }

    return block;
}

ErrorOr<NonnullRefPtr<Inode>> Ext2FSInode::create_child(StringView name, mode_t mode, dev_t dev, UserID uid, GroupID gid)
{
    if (Kernel::is_directory(mode))
        return fs().create_directory(*this, name, mode, uid, gid);
    return fs().create_inode(*this, name, mode, dev, uid, gid);
}

ErrorOr<void> Ext2FSInode::add_child(Inode& child, StringView name, mode_t mode)
{
    MutexLocker locker(m_inode_lock);
    VERIFY(is_directory());

    if (name.length() > EXT2_NAME_LEN)
        return ENAMETOOLONG;

    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::add_child(): Adding inode {} with name '{}' and mode {:o} to directory {}", identifier(), child.index(), name, mode, index());
    bool has_file_type_attribute = has_flag(fs().get_features_optional(), Ext2FS::FeaturesOptional::ExtendedAttributes);

    Vector<Ext2FSDirectoryEntry> entries;
    TRY(traverse_as_directory([&](auto& entry) -> ErrorOr<void> {
        if (name == entry.name)
            return EEXIST;
        auto entry_name = TRY(KString::try_create(entry.name));
        TRY(entries.try_append({ move(entry_name), entry.inode.index(), has_file_type_attribute ? entry.file_type : (u8)EXT2_FT_UNKNOWN }));
        return {};
    }));

    TRY(child.increment_link_count());

    auto entry_name = TRY(KString::try_create(name));
    TRY(entries.try_empend(move(entry_name), child.index(), has_file_type_attribute ? to_ext2_file_type(mode) : (u8)EXT2_FT_UNKNOWN));

    TRY(write_directory(entries));
    TRY(populate_lookup_cache());

    auto cache_entry_name = TRY(KString::try_create(name));
    TRY(m_lookup_cache.try_set(move(cache_entry_name), child.index()));
    did_add_child(child.identifier(), name);
    return {};
}

ErrorOr<void> Ext2FSInode::remove_child_impl(StringView name, RemoveDotEntries remove_dot_entries)
{
    MutexLocker locker(m_inode_lock);
    VERIFY(is_directory());

    TRY(populate_lookup_cache());

    auto it = m_lookup_cache.find(name);
    if (it == m_lookup_cache.end())
        return ENOENT;
    auto child_inode_index = (*it).value;

    InodeIdentifier child_id { fsid(), child_inode_index };
    auto child_inode = TRY(fs().get_inode(child_id));
    if (child_inode->is_directory() && remove_dot_entries == RemoveDotEntries::Yes) {
        TRY(static_cast<Ext2FSInode&>(*child_inode).remove_child_impl("."sv, RemoveDotEntries::No));
        TRY(static_cast<Ext2FSInode&>(*child_inode).remove_child_impl(".."sv, RemoveDotEntries::No));
    }

    bool has_file_type_attribute = has_flag(fs().get_features_optional(), Ext2FS::FeaturesOptional::ExtendedAttributes);

    Vector<Ext2FSDirectoryEntry> entries;
    TRY(traverse_as_directory([&](auto& entry) -> ErrorOr<void> {
        if (name != entry.name) {
            auto entry_name = TRY(KString::try_create(entry.name));
            TRY(entries.try_append({ move(entry_name), entry.inode.index(), has_file_type_attribute ? entry.file_type : (u8)EXT2_FT_UNKNOWN }));
        }
        return {};
    }));

    TRY(write_directory(entries));

    m_lookup_cache.remove(it);

    TRY(child_inode->decrement_link_count());

    did_remove_child(child_id, name);
    return {};
}

ErrorOr<void> Ext2FSInode::remove_child(StringView name)
{
    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]::remove_child(): Removing '{}'", identifier(), name);
    // TODO: Implement something like remove_directory so we can get rid of remove_child_impl.
    return remove_child_impl(name, RemoveDotEntries::Yes);
}

ErrorOr<void> Ext2FSInode::populate_lookup_cache()
{
    VERIFY(m_inode_lock.is_exclusively_locked_by_current_thread());
    if (!m_lookup_cache.is_empty())
        return {};
    HashMap<NonnullOwnPtr<KString>, InodeIndex> children;

    TRY(traverse_as_directory([&children](auto& entry) -> ErrorOr<void> {
        auto entry_name = TRY(KString::try_create(entry.name));
        TRY(children.try_set(move(entry_name), entry.inode.index()));
        return {};
    }));

    VERIFY(m_lookup_cache.is_empty());
    m_lookup_cache = move(children);
    return {};
}

ErrorOr<NonnullRefPtr<Inode>> Ext2FSInode::lookup(StringView name)
{
    VERIFY(is_directory());
    dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]:lookup(): Looking up '{}'", identifier(), name);

    InodeIndex inode_index;
    {
        MutexLocker locker(m_inode_lock);
        TRY(populate_lookup_cache());
        auto it = m_lookup_cache.find(name);
        if (it == m_lookup_cache.end()) {
            dbgln_if(EXT2_DEBUG, "Ext2FSInode[{}]:lookup(): '{}' not found", identifier(), name);
            return ENOENT;
        }
        inode_index = it->value;
    }

    return fs().get_inode({ fsid(), inode_index });
}

ErrorOr<void> Ext2FSInode::update_timestamps(Optional<UnixDateTime> atime, Optional<UnixDateTime> ctime, Optional<UnixDateTime> mtime)
{
    MutexLocker locker(m_inode_lock);
    if (fs().is_readonly())
        return EROFS;
    if (atime.value_or({}).to_timespec().tv_sec > NumericLimits<i32>::max() && !is_within_inode_bounds(bit_cast<FlatPtr>(&m_raw_inode), bit_cast<FlatPtr>(&m_raw_inode.i_atime_extra), sizeof(m_raw_inode.i_atime_extra)))
        return EINVAL;
    if (ctime.value_or({}).to_timespec().tv_sec > NumericLimits<i32>::max() && !is_within_inode_bounds(bit_cast<FlatPtr>(&m_raw_inode), bit_cast<FlatPtr>(&m_raw_inode.i_ctime_extra), sizeof(m_raw_inode.i_ctime_extra)))
        return EINVAL;
    if (mtime.value_or({}).to_timespec().tv_sec > NumericLimits<i32>::max() && !is_within_inode_bounds(bit_cast<FlatPtr>(&m_raw_inode), bit_cast<FlatPtr>(&m_raw_inode.i_mtime_extra), sizeof(m_raw_inode.i_mtime_extra)))
        return EINVAL;

    auto maybe_encode_time = [](auto const& source, u32& time, u32& time_extra) {
        if (!source.has_value())
            return;

        time_t seconds = source.value().to_timespec().tv_sec;
        u32 nanoseconds = source.value().to_timespec().tv_nsec;
        time = static_cast<u32>(seconds);
        time_extra = encode_time_to_extra(seconds, nanoseconds);
    };

    maybe_encode_time(atime, m_raw_inode.i_atime, m_raw_inode.i_atime_extra);
    maybe_encode_time(ctime, m_raw_inode.i_ctime, m_raw_inode.i_ctime_extra);
    maybe_encode_time(mtime, m_raw_inode.i_mtime, m_raw_inode.i_mtime_extra);

    set_metadata_dirty(true);
    return {};
}

ErrorOr<void> Ext2FSInode::increment_link_count()
{
    MutexLocker locker(m_inode_lock);
    if (fs().is_readonly())
        return EROFS;
    constexpr size_t max_link_count = 65535;
    if (m_raw_inode.i_links_count == max_link_count)
        return EMLINK;
    ++m_raw_inode.i_links_count;
    set_metadata_dirty(true);
    return {};
}

ErrorOr<void> Ext2FSInode::decrement_link_count()
{
    MutexLocker locker(m_inode_lock);
    if (fs().is_readonly())
        return EROFS;
    VERIFY(m_raw_inode.i_links_count);

    --m_raw_inode.i_links_count;
    set_metadata_dirty(true);
    if (m_raw_inode.i_links_count == 0)
        did_delete_self();

    if (ref_count() == 1 && m_raw_inode.i_links_count == 0)
        fs().uncache_inode(index());

    return {};
}

ErrorOr<void> Ext2FSInode::chmod(mode_t mode)
{
    MutexLocker locker(m_inode_lock);
    if (m_raw_inode.i_mode == mode)
        return {};
    m_raw_inode.i_mode = mode;
    set_metadata_dirty(true);
    return {};
}

ErrorOr<void> Ext2FSInode::chown(UserID uid, GroupID gid)
{
    MutexLocker locker(m_inode_lock);
    if (inode_uid(m_raw_inode) == uid && inode_gid(m_raw_inode) == gid)
        return {};
    m_raw_inode.i_uid = static_cast<u16>(uid.value());
    ext2fs_set_i_uid_high(m_raw_inode, uid.value() >> 16);
    m_raw_inode.i_gid = static_cast<u16>(gid.value());
    ext2fs_set_i_gid_high(m_raw_inode, gid.value() >> 16);
    set_metadata_dirty(true);
    return {};
}

ErrorOr<void> Ext2FSInode::truncate_locked(u64 size)
{
    VERIFY(m_inode_lock.is_locked());
    if (static_cast<u64>(m_raw_inode.i_size) == size)
        return {};
    TRY(resize(size));
    set_metadata_dirty(true);
    did_modify_contents();
    return {};
}

ErrorOr<int> Ext2FSInode::get_block_address(int index)
{
    MutexLocker locker(m_inode_lock);

    if (index < 0)
        return 0;

    return TRY(m_block_view.get_block(index)).value();
}

}