/* Copyright (C) 2023 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "graphics/Color.h"
#include "graphics/LightEnv.h"
#include "graphics/Material.h"
#include "graphics/Model.h"
#include "graphics/ModelDef.h"
#include "graphics/ShaderManager.h"
#include "graphics/TextureManager.h"
#include "lib/allocators/DynamicArena.h"
#include "lib/allocators/STLAllocators.h"
#include "lib/hash.h"
#include "maths/Vector3D.h"
#include "maths/Vector4D.h"
#include "ps/CLogger.h"
#include "ps/containers/Span.h"
#include "ps/CStrInternStatic.h"
#include "ps/Profile.h"
#include "renderer/MikktspaceWrap.h"
#include "renderer/ModelRenderer.h"
#include "renderer/ModelVertexRenderer.h"
#include "renderer/Renderer.h"
#include "renderer/RenderModifiers.h"
#include "renderer/SceneRenderer.h"
#include "renderer/SkyManager.h"
#include "renderer/TimeManager.h"
#include "renderer/WaterManager.h"
///////////////////////////////////////////////////////////////////////////////////////////////
// ModelRenderer implementation
void ModelRenderer::Init()
{
}
// Helper function to copy object-space position and normal vectors into arrays.
void ModelRenderer::CopyPositionAndNormals(
const CModelDefPtr& mdef,
const VertexArrayIterator& Position,
const VertexArrayIterator& Normal)
{
size_t numVertices = mdef->GetNumVertices();
SModelVertex* vertices = mdef->GetVertices();
for (size_t j = 0; j < numVertices; ++j)
{
Position[j] = vertices[j].m_Coords;
Normal[j] = vertices[j].m_Norm;
}
}
// Helper function to transform position and normal vectors into world-space.
void ModelRenderer::BuildPositionAndNormals(
CModel* model,
const VertexArrayIterator& Position,
const VertexArrayIterator& Normal)
{
CModelDefPtr mdef = model->GetModelDef();
size_t numVertices = mdef->GetNumVertices();
SModelVertex* vertices = mdef->GetVertices();
if (model->IsSkinned())
{
// boned model - calculate skinned vertex positions/normals
// Avoid the noisy warnings that occur inside SkinPoint/SkinNormal in
// some broken situations
if (numVertices && vertices[0].m_Blend.m_Bone[0] == 0xff)
{
LOGERROR("Model %s is boned with unboned animation", mdef->GetName().string8());
return;
}
CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
}
else
{
PROFILE("software transform");
// just copy regular positions, transform normals to world space
const CMatrix3D& transform = model->GetTransform();
const CMatrix3D& invtransform = model->GetInvTransform();
for (size_t j = 0; j < numVertices; ++j)
{
transform.Transform(vertices[j].m_Coords, Position[j]);
invtransform.RotateTransposed(vertices[j].m_Norm, Normal[j]);
}
}
}
// Helper function for lighting
void ModelRenderer::BuildColor4ub(
CModel* model,
const VertexArrayIterator& Normal,
const VertexArrayIterator& Color)
{
PROFILE("lighting vertices");
CModelDefPtr mdef = model->GetModelDef();
size_t numVertices = mdef->GetNumVertices();
const CLightEnv& lightEnv = g_Renderer.GetSceneRenderer().GetLightEnv();
CColor shadingColor = model->GetShadingColor();
for (size_t j = 0; j < numVertices; ++j)
{
RGBColor tempcolor = lightEnv.EvaluateUnitScaled(Normal[j]);
tempcolor.X *= shadingColor.r;
tempcolor.Y *= shadingColor.g;
tempcolor.Z *= shadingColor.b;
Color[j] = ConvertRGBColorTo4ub(tempcolor);
}
}
void ModelRenderer::GenTangents(const CModelDefPtr& mdef, std::vector& newVertices, bool gpuSkinning)
{
MikkTSpace ms(mdef, newVertices, gpuSkinning);
ms.Generate();
}
// Copy UV coordinates
void ModelRenderer::BuildUV(
const CModelDefPtr& mdef,
const VertexArrayIterator& UV,
int UVset)
{
const size_t numVertices = mdef->GetNumVertices();
const size_t numberOfUVPerVertex = mdef->GetNumUVsPerVertex();
for (size_t j = 0; j < numVertices; ++j)
{
const CVector2D& uv = mdef->GetUVCoordinates()[j * numberOfUVPerVertex + UVset];
UV[j][0] = uv.X;
UV[j][1] = 1.0 - uv.Y;
}
}
// Build default indices array.
void ModelRenderer::BuildIndices(
const CModelDefPtr& mdef,
const VertexArrayIterator& Indices)
{
size_t idxidx = 0;
SModelFace* faces = mdef->GetFaces();
for (size_t j = 0; j < mdef->GetNumFaces(); ++j)
{
SModelFace& face = faces[j];
Indices[idxidx++] = face.m_Verts[0];
Indices[idxidx++] = face.m_Verts[1];
Indices[idxidx++] = face.m_Verts[2];
}
}
///////////////////////////////////////////////////////////////////////////////////////////////
// ShaderModelRenderer implementation
/**
* Internal data of the ShaderModelRenderer.
*
* Separated into the source file to increase implementation hiding (and to
* avoid some causes of recompiles).
*/
struct ShaderModelRenderer::ShaderModelRendererInternals
{
ShaderModelRendererInternals(ShaderModelRenderer* r) : m_Renderer(r) { }
/// Back-link to "our" renderer
ShaderModelRenderer* m_Renderer;
/// ModelVertexRenderer used for vertex transformations
ModelVertexRendererPtr vertexRenderer;
/// List of submitted models for rendering in this frame
std::vector submissions[CSceneRenderer::CULL_MAX];
};
// Construction/Destruction
ShaderModelRenderer::ShaderModelRenderer(ModelVertexRendererPtr vertexrenderer)
{
m = new ShaderModelRendererInternals(this);
m->vertexRenderer = vertexrenderer;
}
ShaderModelRenderer::~ShaderModelRenderer()
{
delete m;
}
// Submit one model.
void ShaderModelRenderer::Submit(int cullGroup, CModel* model)
{
CModelRData* rdata = (CModelRData*)model->GetRenderData();
// Ensure model data is valid
const void* key = m->vertexRenderer.get();
if (!rdata || rdata->GetKey() != key)
{
rdata = m->vertexRenderer->CreateModelData(key, model);
model->SetRenderData(rdata);
model->SetDirty(~0u);
}
m->submissions[cullGroup].push_back(model);
}
// Call update for all submitted models and enter the rendering phase
void ShaderModelRenderer::PrepareModels()
{
for (int cullGroup = 0; cullGroup < CSceneRenderer::CULL_MAX; ++cullGroup)
{
for (CModel* model : m->submissions[cullGroup])
{
model->ValidatePosition();
CModelRData* rdata = static_cast(model->GetRenderData());
ENSURE(rdata->GetKey() == m->vertexRenderer.get());
m->vertexRenderer->UpdateModelData(model, rdata, rdata->m_UpdateFlags);
rdata->m_UpdateFlags = 0;
}
}
}
void ShaderModelRenderer::UploadModels(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
{
for (int cullGroup = 0; cullGroup < CSceneRenderer::CULL_MAX; ++cullGroup)
{
for (CModel* model : m->submissions[cullGroup])
{
CModelRData* rdata = static_cast(model->GetRenderData());
ENSURE(rdata->GetKey() == m->vertexRenderer.get());
m->vertexRenderer->UploadModelData(deviceCommandContext, model, rdata);
}
}
}
// Clear the submissions list
void ShaderModelRenderer::EndFrame()
{
for (int cullGroup = 0; cullGroup < CSceneRenderer::CULL_MAX; ++cullGroup)
m->submissions[cullGroup].clear();
}
// Helper structs for ShaderModelRenderer::Render():
struct SMRSortByDistItem
{
size_t techIdx;
CModel* model;
float dist;
};
struct SMRBatchModel
{
bool operator()(CModel* a, CModel* b)
{
if (a->GetModelDef() < b->GetModelDef())
return true;
if (b->GetModelDef() < a->GetModelDef())
return false;
if (a->GetMaterial().GetDiffuseTexture() < b->GetMaterial().GetDiffuseTexture())
return true;
if (b->GetMaterial().GetDiffuseTexture() < a->GetMaterial().GetDiffuseTexture())
return false;
return a->GetMaterial().GetStaticUniforms() < b->GetMaterial().GetStaticUniforms();
}
};
struct SMRCompareSortByDistItem
{
bool operator()(const SMRSortByDistItem& a, const SMRSortByDistItem& b)
{
// Prefer items with greater distance, so we draw back-to-front
return (a.dist > b.dist);
// (Distances will almost always be distinct, so we don't need to bother
// tie-breaking on modeldef/texture/etc)
}
};
class SMRMaterialBucketKey
{
public:
SMRMaterialBucketKey(CStrIntern effect, const CShaderDefines& defines)
: effect(effect), defines(defines) { }
SMRMaterialBucketKey(const SMRMaterialBucketKey& entity) = default;
CStrIntern effect;
CShaderDefines defines;
bool operator==(const SMRMaterialBucketKey& b) const
{
return (effect == b.effect && defines == b.defines);
}
private:
SMRMaterialBucketKey& operator=(const SMRMaterialBucketKey&);
};
struct SMRMaterialBucketKeyHash
{
size_t operator()(const SMRMaterialBucketKey& key) const
{
size_t hash = 0;
hash_combine(hash, key.effect.GetHash());
hash_combine(hash, key.defines.GetHash());
return hash;
}
};
struct SMRTechBucket
{
CShaderTechniquePtr tech;
PS::span models;
// Model list is stored as pointers, not as a std::vector,
// so that sorting lists of this struct is fast
};
struct SMRCompareTechBucket
{
bool operator()(const SMRTechBucket& a, const SMRTechBucket& b)
{
return a.tech < b.tech;
}
};
void ShaderModelRenderer::Render(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
const RenderModifierPtr& modifier, const CShaderDefines& context, int cullGroup, int flags)
{
if (m->submissions[cullGroup].empty())
return;
CMatrix3D worldToCam;
g_Renderer.GetSceneRenderer().GetViewCamera().GetOrientation().GetInverse(worldToCam);
/*
* Rendering approach:
*
* m->submissions contains the list of CModels to render.
*
* The data we need to render a model is:
* - CShaderTechnique
* - CTexture
* - CShaderUniforms
* - CModelDef (mesh data)
* - CModel (model instance data)
*
* For efficient rendering, we need to batch the draw calls to minimise state changes.
* (Uniform and texture changes are assumed to be cheaper than binding new mesh data,
* and shader changes are assumed to be most expensive.)
* First, group all models that share a technique to render them together.
* Within those groups, sub-group by CModelDef.
* Within those sub-groups, sub-sub-group by CTexture.
* Within those sub-sub-groups, sub-sub-sub-group by CShaderUniforms.
*
* Alpha-blended models have to be sorted by distance from camera,
* then we can batch as long as the order is preserved.
* Non-alpha-blended models can be arbitrarily reordered to maximise batching.
*
* For each model, the CShaderTechnique is derived from:
* - The current global 'context' defines
* - The CModel's material's defines
* - The CModel's material's shader effect name
*
* There are a smallish number of materials, and a smaller number of techniques.
*
* To minimise technique lookups, we first group models by material,
* in 'materialBuckets' (a hash table).
*
* For each material bucket we then look up the appropriate shader technique.
* If the technique requires sort-by-distance, the model is added to the
* 'sortByDistItems' list with its computed distance.
* Otherwise, the bucket's list of models is sorted by modeldef+texture+uniforms,
* then the technique and model list is added to 'techBuckets'.
*
* 'techBuckets' is then sorted by technique, to improve batching when multiple
* materials map onto the same technique.
*
* (Note that this isn't perfect batching: we don't sort across models in
* multiple buckets that share a technique. In practice that shouldn't reduce
* batching much (we rarely have one mesh used with multiple materials),
* and it saves on copying and lets us sort smaller lists.)
*
* Extra tech buckets are added for the sorted-by-distance models without reordering.
* Finally we render by looping over each tech bucket, then looping over the model
* list in each, rebinding the GL state whenever it changes.
*/
using Arena = Allocators::DynamicArena<256 * KiB>;
Arena arena;
using ModelListAllocator = ProxyAllocator;
using ModelList_t = std::vector;
using MaterialBuckets_t = std::unordered_map<
SMRMaterialBucketKey,
ModelList_t,
SMRMaterialBucketKeyHash,
std::equal_to,
ProxyAllocator<
std::pair,
Arena> >;
MaterialBuckets_t materialBuckets((MaterialBuckets_t::allocator_type(arena)));
{
PROFILE3("bucketing by material");
for (size_t i = 0; i < m->submissions[cullGroup].size(); ++i)
{
CModel* model = m->submissions[cullGroup][i];
const CShaderDefines& defines = model->GetMaterial().GetShaderDefines();
SMRMaterialBucketKey key(model->GetMaterial().GetShaderEffect(), defines);
MaterialBuckets_t::iterator it = materialBuckets.find(key);
if (it == materialBuckets.end())
{
std::pair inserted = materialBuckets.insert(
std::make_pair(key, ModelList_t(ModelList_t::allocator_type(arena))));
inserted.first->second.reserve(32);
inserted.first->second.push_back(model);
}
else
{
it->second.push_back(model);
}
}
}
using SortByDistItemsAllocator = ProxyAllocator;
std::vector sortByDistItems((SortByDistItemsAllocator(arena)));
using SortByTechItemsAllocator = ProxyAllocator;
std::vector sortByDistTechs((SortByTechItemsAllocator(arena)));
// indexed by sortByDistItems[i].techIdx
// (which stores indexes instead of CShaderTechniquePtr directly
// to avoid the shared_ptr copy cost when sorting; maybe it'd be better
// if we just stored raw CShaderTechnique* and assumed the shader manager
// will keep it alive long enough)
using TechBucketsAllocator = ProxyAllocator;
std::vector techBuckets((TechBucketsAllocator(arena)));
{
PROFILE3("processing material buckets");
for (MaterialBuckets_t::iterator it = materialBuckets.begin(); it != materialBuckets.end(); ++it)
{
CShaderDefines defines = context;
defines.SetMany(it->first.defines);
CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(it->first.effect, defines);
// Skip invalid techniques (e.g. from data file errors)
if (!tech)
continue;
if (tech->GetSortByDistance())
{
// Add the tech into a vector so we can index it
// (There might be duplicates in this list, but that doesn't really matter)
if (sortByDistTechs.empty() || sortByDistTechs.back() != tech)
sortByDistTechs.push_back(tech);
size_t techIdx = sortByDistTechs.size() - 1;
// Add each model into sortByDistItems
for (size_t i = 0; i < it->second.size(); ++i)
{
SMRSortByDistItem itemWithDist;
itemWithDist.techIdx = techIdx;
CModel* model = it->second[i];
itemWithDist.model = model;
CVector3D modelpos = model->GetTransform().GetTranslation();
itemWithDist.dist = worldToCam.Transform(modelpos).Z;
sortByDistItems.push_back(itemWithDist);
}
}
else
{
// Sort model list by modeldef+texture, for batching
// TODO: This only sorts by base texture. While this is an OK approximation
// for most cases (as related samplers are usually used together), it would be better
// to take all the samplers into account when sorting here.
std::sort(it->second.begin(), it->second.end(), SMRBatchModel());
// Add a tech bucket pointing at this model list
SMRTechBucket techBucket{tech, it->second};
techBuckets.push_back(techBucket);
}
}
}
{
PROFILE3("sorting tech buckets");
// Sort by technique, for better batching
std::sort(techBuckets.begin(), techBuckets.end(), SMRCompareTechBucket());
}
// List of models corresponding to sortByDistItems[i].model
// (This exists primarily because techBuckets wants a CModel**;
// we could avoid the cost of copying into this list by adding
// a stride length into techBuckets and not requiring contiguous CModel*s)
std::vector sortByDistModels((ModelListAllocator(arena)));
if (!sortByDistItems.empty())
{
{
PROFILE3("sorting items by dist");
std::sort(sortByDistItems.begin(), sortByDistItems.end(), SMRCompareSortByDistItem());
}
{
PROFILE3("batching dist-sorted items");
sortByDistModels.reserve(sortByDistItems.size());
// Find runs of distance-sorted models that share a technique,
// and create a new tech bucket for each run
size_t start = 0; // start of current run
size_t currentTechIdx = sortByDistItems[start].techIdx;
for (size_t end = 0; end < sortByDistItems.size(); ++end)
{
sortByDistModels.push_back(sortByDistItems[end].model);
size_t techIdx = sortByDistItems[end].techIdx;
if (techIdx != currentTechIdx)
{
// Start of a new run - push the old run into a new tech bucket
SMRTechBucket techBucket{sortByDistTechs[currentTechIdx],
{sortByDistModels.data() + start, end - start}};
techBuckets.push_back(techBucket);
start = end;
currentTechIdx = techIdx;
}
}
// Add the tech bucket for the final run
SMRTechBucket techBucket{sortByDistTechs[currentTechIdx],
{sortByDistModels.data() + start, sortByDistItems.size() - start}};
techBuckets.push_back(techBucket);
}
}
const double time = g_Renderer.GetTimeManager().GetGlobalTime();
{
PROFILE3("rendering bucketed submissions");
size_t idxTechStart = 0;
// This vector keeps track of texture changes during rendering. It is kept outside the
// loops to avoid excessive reallocations. The token allocation of 64 elements
// should be plenty, though it is reallocated below (at a cost) if necessary.
using TextureListAllocator = ProxyAllocator;
std::vector currentTexs((TextureListAllocator(arena)));
currentTexs.reserve(64);
// texBindings holds the identifier bindings in the shader, which can no longer be defined
// statically in the ShaderRenderModifier class. texBindingNames uses interned strings to
// keep track of when bindings need to be reevaluated.
using BindingListAllocator = ProxyAllocator;
std::vector texBindings((BindingListAllocator(arena)));
texBindings.reserve(64);
using BindingNamesListAllocator = ProxyAllocator;
std::vector texBindingNames((BindingNamesListAllocator(arena)));
texBindingNames.reserve(64);
while (idxTechStart < techBuckets.size())
{
CShaderTechniquePtr currentTech = techBuckets[idxTechStart].tech;
// Find runs [idxTechStart, idxTechEnd) in techBuckets of the same technique
size_t idxTechEnd;
for (idxTechEnd = idxTechStart + 1; idxTechEnd < techBuckets.size(); ++idxTechEnd)
{
if (techBuckets[idxTechEnd].tech != currentTech)
break;
}
// For each of the technique's passes, render all the models in this run
for (int pass = 0; pass < currentTech->GetNumPasses(); ++pass)
{
deviceCommandContext->SetGraphicsPipelineState(
currentTech->GetGraphicsPipelineState(pass));
deviceCommandContext->BeginPass();
Renderer::Backend::IShaderProgram* shader = currentTech->GetShader(pass);
modifier->BeginPass(deviceCommandContext, shader);
// TODO: Use a more generic approach to handle bound queries.
bool boundTime = false;
bool boundWaterTexture = false;
bool boundSkyCube = false;
// When the shader technique changes, textures need to be
// rebound, so ensure there are no remnants from the last pass.
// (the vector size is set to 0, but memory is not freed)
currentTexs.clear();
texBindings.clear();
texBindingNames.clear();
CModelDef* currentModeldef = NULL;
CShaderUniforms currentStaticUniforms;
for (size_t idx = idxTechStart; idx < idxTechEnd; ++idx)
{
for (CModel* model : techBuckets[idx].models)
{
if (flags && !(model->GetFlags() & flags))
continue;
const CMaterial::SamplersVector& samplers = model->GetMaterial().GetSamplers();
size_t samplersNum = samplers.size();
// make sure the vectors are the right virtual sizes, and also
// reallocate if there are more samplers than expected.
if (currentTexs.size() != samplersNum)
{
currentTexs.resize(samplersNum, NULL);
texBindings.resize(samplersNum, -1);
texBindingNames.resize(samplersNum, CStrIntern());
// ensure they are definitely empty
std::fill(texBindings.begin(), texBindings.end(), -1);
std::fill(currentTexs.begin(), currentTexs.end(), nullptr);
std::fill(texBindingNames.begin(), texBindingNames.end(), CStrIntern());
}
// bind the samplers to the shader
for (size_t s = 0; s < samplersNum; ++s)
{
const CMaterial::TextureSampler& samp = samplers[s];
// check that the handles are current
// and reevaluate them if necessary
if (texBindingNames[s] != samp.Name || texBindings[s] < 0)
{
texBindings[s] = shader->GetBindingSlot(samp.Name);
texBindingNames[s] = samp.Name;
}
// same with the actual sampler bindings
CTexture* newTex = samp.Sampler.get();
if (texBindings[s] >= 0 && newTex != currentTexs[s])
{
newTex->UploadBackendTextureIfNeeded(deviceCommandContext);
deviceCommandContext->SetTexture(
texBindings[s], newTex->GetBackendTexture());
currentTexs[s] = newTex;
}
}
// Bind modeldef when it changes
CModelDef* newModeldef = model->GetModelDef().get();
if (newModeldef != currentModeldef)
{
currentModeldef = newModeldef;
m->vertexRenderer->PrepareModelDef(deviceCommandContext, *currentModeldef);
}
// Bind all uniforms when any change
CShaderUniforms newStaticUniforms = model->GetMaterial().GetStaticUniforms();
if (newStaticUniforms != currentStaticUniforms)
{
currentStaticUniforms = newStaticUniforms;
currentStaticUniforms.BindUniforms(deviceCommandContext, shader);
}
const CShaderRenderQueries& renderQueries = model->GetMaterial().GetRenderQueries();
for (size_t q = 0; q < renderQueries.GetSize(); ++q)
{
CShaderRenderQueries::RenderQuery rq = renderQueries.GetItem(q);
if (rq.first == RQUERY_TIME)
{
if (!boundTime)
{
deviceCommandContext->SetUniform(
shader->GetBindingSlot(rq.second), time);
boundTime = true;
}
}
else if (rq.first == RQUERY_WATER_TEX)
{
if (!boundWaterTexture)
{
const double period = 1.6;
const WaterManager& waterManager = g_Renderer.GetSceneRenderer().GetWaterManager();
if (waterManager.m_RenderWater && waterManager.WillRenderFancyWater())
{
const CTexturePtr& waterTexture = waterManager.m_NormalMap[waterManager.GetCurrentTextureIndex(period)];
waterTexture->UploadBackendTextureIfNeeded(deviceCommandContext);
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_waterTex),
waterTexture->GetBackendTexture());
}
else
{
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_waterTex),
g_Renderer.GetTextureManager().GetErrorTexture()->GetBackendTexture());
}
boundWaterTexture = true;
}
}
else if (rq.first == RQUERY_SKY_CUBE)
{
if (!boundSkyCube)
{
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_skyCube),
g_Renderer.GetSceneRenderer().GetSkyManager().GetSkyCube());
boundSkyCube = true;
}
}
}
modifier->PrepareModel(deviceCommandContext, model);
CModelRData* rdata = static_cast(model->GetRenderData());
ENSURE(rdata->GetKey() == m->vertexRenderer.get());
m->vertexRenderer->RenderModel(deviceCommandContext, shader, model, rdata);
}
}
deviceCommandContext->EndPass();
}
idxTechStart = idxTechEnd;
}
}
}