/* Copyright (C) 2024 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "RingCommandContext.h" #include "lib/bits.h" #include "renderer/backend/vulkan/Buffer.h" #include "renderer/backend/vulkan/Device.h" #include "renderer/backend/vulkan/Utilities.h" #include #include #include namespace Renderer { namespace Backend { namespace Vulkan { namespace { constexpr uint32_t INITIAL_STAGING_BUFFER_CAPACITY = 1024 * 1024; constexpr VkDeviceSize SMALL_HOST_TOTAL_MEMORY_THRESHOLD = 1024 * 1024 * 1024; constexpr uint32_t MAX_SMALL_STAGING_BUFFER_CAPACITY = 64 * 1024 * 1024; constexpr uint32_t MAX_STAGING_BUFFER_CAPACITY = 256 * 1024 * 1024; constexpr uint32_t INVALID_OFFSET = std::numeric_limits::max(); } // anonymous namespace CRingCommandContext::CRingCommandContext( CDevice* device, const size_t size, const uint32_t queueFamilyIndex, CSubmitScheduler& submitScheduler) : m_Device(device), m_SubmitScheduler(submitScheduler) { ENSURE(m_Device); m_OptimalBufferCopyOffsetAlignment = std::max( 1u, static_cast(m_Device->GetChoosenPhysicalDevice().properties.limits.optimalBufferCopyOffsetAlignment)); // In case of small amount of host memory it's better to make uploading // slower rather than crashing due to OOM, because memory for a // staging buffer is allocated in the host memory. m_MaxStagingBufferCapacity = m_Device->GetChoosenPhysicalDevice().hostTotalMemory <= SMALL_HOST_TOTAL_MEMORY_THRESHOLD ? MAX_SMALL_STAGING_BUFFER_CAPACITY : MAX_STAGING_BUFFER_CAPACITY; m_Ring.resize(size); for (RingItem& item : m_Ring) { VkCommandPoolCreateInfo commandPoolCreateInfoInfo{}; commandPoolCreateInfoInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; commandPoolCreateInfoInfo.queueFamilyIndex = queueFamilyIndex; ENSURE_VK_SUCCESS(vkCreateCommandPool( m_Device->GetVkDevice(), &commandPoolCreateInfoInfo, nullptr, &item.commandPool)); VkCommandBufferAllocateInfo allocateInfo{}; allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocateInfo.commandPool = item.commandPool; allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocateInfo.commandBufferCount = 1; ENSURE_VK_SUCCESS(vkAllocateCommandBuffers( m_Device->GetVkDevice(), &allocateInfo, &item.commandBuffer)); device->SetObjectName( VK_OBJECT_TYPE_COMMAND_BUFFER, item.commandBuffer, "RingCommandBuffer"); } } CRingCommandContext::~CRingCommandContext() { VkDevice device = m_Device->GetVkDevice(); for (RingItem& item : m_Ring) { if (item.commandBuffer != VK_NULL_HANDLE) vkFreeCommandBuffers(device, item.commandPool, 1, &item.commandBuffer); if (item.commandPool != VK_NULL_HANDLE) vkDestroyCommandPool(device, item.commandPool, nullptr); } } VkCommandBuffer CRingCommandContext::GetCommandBuffer() { RingItem& item = m_Ring[m_RingIndex]; if (!item.isBegan) Begin(); return item.commandBuffer; } void CRingCommandContext::Flush() { RingItem& item = m_Ring[m_RingIndex]; if (!item.isBegan) return; End(); item.handle = m_SubmitScheduler.Submit(item.commandBuffer); m_RingIndex = (m_RingIndex + 1) % m_Ring.size(); } void CRingCommandContext::FlushAndWait() { RingItem& item = m_Ring[m_RingIndex]; ENSURE(item.isBegan); End(); item.handle = m_SubmitScheduler.Submit(item.commandBuffer); WaitUntilFree(item); } void CRingCommandContext::ScheduleUpload( CTexture* texture, const Format dataFormat, const void* data, const size_t dataSize, const uint32_t level, const uint32_t layer) { const uint32_t mininumSize = 1u; const uint32_t width = std::max(mininumSize, texture->GetWidth() >> level); const uint32_t height = std::max(mininumSize, texture->GetHeight() >> level); ScheduleUpload( texture, dataFormat, data, dataSize, 0, 0, width, height, level, layer); } void CRingCommandContext::ScheduleUpload( CTexture* texture, const Format UNUSED(dataFormat), const void* data, const size_t dataSize, const uint32_t xOffset, const uint32_t yOffset, const uint32_t width, const uint32_t height, const uint32_t level, const uint32_t layer) { ENSURE(texture->GetType() != ITexture::Type::TEXTURE_2D_MULTISAMPLE); const Format format = texture->GetFormat(); if (texture->GetType() != ITexture::Type::TEXTURE_CUBE) ENSURE(layer == 0); ENSURE(format != Format::R8G8B8_UNORM); const bool isCompressedFormat = format == Format::BC1_RGB_UNORM || format == Format::BC1_RGBA_UNORM || format == Format::BC2_UNORM || format == Format::BC3_UNORM; ENSURE( format == Format::R8_UNORM || format == Format::R8G8_UNORM || format == Format::R8G8B8A8_UNORM || format == Format::A8_UNORM || format == Format::L8_UNORM || isCompressedFormat); // TODO: use a more precise format alignment. constexpr uint32_t formatAlignment = 16; const uint32_t offset = AcquireFreeSpace(dataSize, std::max(formatAlignment, m_OptimalBufferCopyOffsetAlignment)); std::memcpy(static_cast(m_StagingBuffer->GetMappedData()) + offset, data, dataSize); VkCommandBuffer commandBuffer = GetCommandBuffer(); VkImage image = texture->GetImage(); Utilities::SubmitImageMemoryBarrier( commandBuffer, image, level, layer, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); VkBufferImageCopy region{}; region.bufferOffset = offset; region.bufferRowLength = 0; region.bufferImageHeight = 0; region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; region.imageSubresource.mipLevel = level; region.imageSubresource.baseArrayLayer = layer; region.imageSubresource.layerCount = 1; region.imageOffset = {static_cast(xOffset), static_cast(yOffset), 0}; region.imageExtent = {width, height, 1}; vkCmdCopyBufferToImage( commandBuffer, m_StagingBuffer->GetVkBuffer(), image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); VkAccessFlags dstAccessFlags = VK_ACCESS_SHADER_READ_BIT; VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; Utilities::SubmitImageMemoryBarrier( commandBuffer, image, level, layer, VK_ACCESS_TRANSFER_WRITE_BIT, dstAccessFlags, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask); texture->SetInitialized(); } void CRingCommandContext::ScheduleUpload( CBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize) { constexpr uint32_t alignment = 16; const uint32_t offset = AcquireFreeSpace(dataSize, alignment); std::memcpy(static_cast(m_StagingBuffer->GetMappedData()) + offset, data, dataSize); ScheduleUpload(buffer, dataOffset, dataSize, offset); } void CRingCommandContext::ScheduleUpload( CBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize, const UploadBufferFunction& uploadFunction) { constexpr uint32_t alignment = 16; const uint32_t offset = AcquireFreeSpace(dataSize, alignment); CBuffer* stagingBuffer = m_StagingBuffer->As(); uploadFunction(static_cast(stagingBuffer->GetMappedData()) + offset - dataOffset); ScheduleUpload(buffer, dataOffset, dataSize, offset); } void CRingCommandContext::ScheduleUpload( CBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize, const uint32_t acquiredOffset) { CBuffer* stagingBuffer = m_StagingBuffer->As(); VkCommandBuffer commandBuffer = GetCommandBuffer(); VkBufferCopy region{}; region.srcOffset = acquiredOffset; region.dstOffset = dataOffset; region.size = dataSize; // TODO: remove transfer mask from pipeline barrier, as we need to batch copies. VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; if (buffer->GetType() == IBuffer::Type::VERTEX || buffer->GetType() == IBuffer::Type::INDEX) srcStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; else if (buffer->GetType() == IBuffer::Type::UNIFORM) srcStageMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; Utilities::SubmitPipelineBarrier( commandBuffer, srcStageMask, dstStageMask); // TODO: currently we might overwrite data which triggers validation // assertion about Write-After-Write hazard. if (buffer->IsDynamic()) { Utilities::SubmitBufferMemoryBarrier( commandBuffer, buffer, dataOffset, dataSize, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); } vkCmdCopyBuffer( commandBuffer, stagingBuffer->GetVkBuffer(), buffer->GetVkBuffer(), 1, ®ion); VkAccessFlags srcAccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT; VkAccessFlags dstAccessFlags = 0; srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; dstStageMask = 0; if (buffer->GetType() == IBuffer::Type::VERTEX) { dstAccessFlags = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; dstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } else if (buffer->GetType() == IBuffer::Type::INDEX) { dstAccessFlags = VK_ACCESS_INDEX_READ_BIT; dstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } else if (buffer->GetType() == IBuffer::Type::UNIFORM) { dstAccessFlags = VK_ACCESS_UNIFORM_READ_BIT; dstStageMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; } Utilities::SubmitBufferMemoryBarrier( commandBuffer, buffer, dataOffset, dataSize, srcAccessFlags, dstAccessFlags, srcStageMask, dstStageMask); } void CRingCommandContext::Begin() { RingItem& item = m_Ring[m_RingIndex]; item.isBegan = true; WaitUntilFree(item); m_StagingBufferCurrentFirst = m_StagingBufferLast; ENSURE_VK_SUCCESS(vkResetCommandPool(m_Device->GetVkDevice(), item.commandPool, 0)); VkCommandBufferBeginInfo beginInfo{}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; beginInfo.pInheritanceInfo = nullptr; ENSURE_VK_SUCCESS(vkBeginCommandBuffer(item.commandBuffer, &beginInfo)); } void CRingCommandContext::End() { RingItem& item = m_Ring[m_RingIndex]; item.isBegan = false; item.stagingBufferFirst = m_StagingBufferCurrentFirst; item.stagingBufferLast = m_StagingBufferLast; ENSURE_VK_SUCCESS(vkEndCommandBuffer(item.commandBuffer)); } void CRingCommandContext::WaitUntilFree(RingItem& item) { m_SubmitScheduler.WaitUntilFree(item.handle); if (item.stagingBufferFirst != item.stagingBufferLast) { m_StagingBufferFirst = item.stagingBufferLast; item.stagingBufferFirst = 0; item.stagingBufferLast = 0; } } uint32_t CRingCommandContext::AcquireFreeSpace( const uint32_t requiredSize, const uint32_t requiredAlignment) { ENSURE(requiredSize <= m_MaxStagingBufferCapacity); const uint32_t offsetCandidate = GetFreeSpaceOffset(requiredSize, requiredAlignment); const bool needsResize = !m_StagingBuffer || offsetCandidate == INVALID_OFFSET; const bool canResize = !m_StagingBuffer || m_StagingBuffer->GetSize() < m_MaxStagingBufferCapacity; if (needsResize && canResize) { const uint32_t minimumRequiredCapacity = round_up_to_pow2(requiredSize); const uint32_t newCapacity = std::min( std::max(m_StagingBuffer ? m_StagingBuffer->GetSize() * 2 : INITIAL_STAGING_BUFFER_CAPACITY, minimumRequiredCapacity), m_MaxStagingBufferCapacity); m_StagingBuffer = m_Device->CreateCBuffer( "UploadRingBuffer", IBuffer::Type::UPLOAD, newCapacity, IBuffer::Usage::TRANSFER_SRC); ENSURE(m_StagingBuffer); m_StagingBufferFirst = 0; m_StagingBufferCurrentFirst = 0; m_StagingBufferLast = requiredSize; for (RingItem& item : m_Ring) { item.stagingBufferFirst = 0; item.stagingBufferLast = 0; } return 0; } else if (needsResize) { // In case we can't resize we need to wait until all scheduled uploads are // completed. for (size_t ringIndexOffset = 1; ringIndexOffset < m_Ring.size() && GetFreeSpaceOffset(requiredSize, requiredAlignment) == INVALID_OFFSET; ++ringIndexOffset) { const size_t ringIndex = (m_RingIndex + ringIndexOffset) % m_Ring.size(); RingItem& item = m_Ring[ringIndex]; WaitUntilFree(item); } // If we still don't have a free space it means we need to flush the // current command buffer. const uint32_t offset = GetFreeSpaceOffset(requiredSize, requiredAlignment); if (offset == INVALID_OFFSET) { RingItem& item = m_Ring[m_RingIndex]; if (item.isBegan) Flush(); WaitUntilFree(item); m_StagingBufferFirst = 0; m_StagingBufferCurrentFirst = 0; m_StagingBufferLast = requiredSize; return 0; } else { m_StagingBufferLast = offset + requiredSize; return offset; } } else { m_StagingBufferLast = offsetCandidate + requiredSize; return offsetCandidate; } } uint32_t CRingCommandContext::GetFreeSpaceOffset( const uint32_t requiredSize, const uint32_t requiredAlignment) const { if (!m_StagingBuffer) return INVALID_OFFSET; const uint32_t candidateOffset = round_up(m_StagingBufferLast, requiredAlignment); const uint32_t candidateLast = candidateOffset + requiredSize; if (m_StagingBufferFirst <= m_StagingBufferLast) { if (candidateLast <= m_StagingBuffer->GetSize()) return candidateOffset; // We intentionally use exclusive comparison to avoid distinguishing // completely full and completely empty staging buffers. else if (requiredSize < m_StagingBufferFirst) return 0; // We assume the first byte is always perfectly aligned. else return INVALID_OFFSET; } else { if (candidateLast < m_StagingBufferFirst) return candidateOffset; else return INVALID_OFFSET; } } } // namespace Vulkan } // namespace Backend } // namespace Renderer