Implement asynchronous transfer queues, thread guards on RenderingDevice. Add ubershaders and rework pipeline caches for Forward+ and Mobile.

- Implements asynchronous transfer queues from PR #87590.
- Adds ubershaders that can run with specialization constants specified as push constants.
- Pipelines with specialization constants can compile in the background.
- Added monitoring for pipeline compilations.
- Materials and shaders can now be created asynchronously on background threads.
- Meshes that are loaded on background threads can also compile pipelines as part of the loading process.
This commit is contained in:
Dario
2024-03-15 14:13:31 -03:00
parent 1917bc3454
commit e2c6daf7ef
78 changed files with 5218 additions and 2544 deletions

View File

@ -34,6 +34,7 @@
#include "servers/rendering/renderer_canvas_render.h"
#include "servers/rendering/renderer_compositor.h"
#include "servers/rendering/renderer_rd/pipeline_cache_rd.h"
#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h"
#include "servers/rendering/renderer_rd/shaders/canvas.glsl.gen.h"
#include "servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl.gen.h"
#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
@ -57,12 +58,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
SHADER_VARIANT_PRIMITIVE_POINTS,
SHADER_VARIANT_ATTRIBUTES,
SHADER_VARIANT_ATTRIBUTES_POINTS,
SHADER_VARIANT_QUAD_LIGHT,
SHADER_VARIANT_NINEPATCH_LIGHT,
SHADER_VARIANT_PRIMITIVE_LIGHT,
SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT,
SHADER_VARIANT_ATTRIBUTES_LIGHT,
SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT,
SHADER_VARIANT_MAX
};
@ -84,14 +79,14 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
FLAGS_NINEPATCH_V_MODE_SHIFT = 18,
FLAGS_LIGHT_COUNT_SHIFT = 20,
FLAGS_DEFAULT_NORMAL_MAP_USED = (1 << 26),
FLAGS_DEFAULT_SPECULAR_MAP_USED = (1 << 27),
FLAGS_DEFAULT_NORMAL_MAP_USED = (1 << 24),
FLAGS_DEFAULT_SPECULAR_MAP_USED = (1 << 25),
FLAGS_USE_MSDF = (1 << 28),
FLAGS_USE_LCD = (1 << 29),
FLAGS_USE_MSDF = (1 << 26),
FLAGS_USE_LCD = (1 << 27),
FLAGS_FLIP_H = (1 << 30),
FLAGS_FLIP_V = (1 << 31),
FLAGS_FLIP_H = (1 << 28),
FLAGS_FLIP_V = (1 << 29),
};
enum {
@ -118,76 +113,82 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
/**** SHADER ****/
/****************/
enum PipelineVariant {
PIPELINE_VARIANT_QUAD,
PIPELINE_VARIANT_NINEPATCH,
PIPELINE_VARIANT_PRIMITIVE_TRIANGLES,
PIPELINE_VARIANT_PRIMITIVE_LINES,
PIPELINE_VARIANT_PRIMITIVE_POINTS,
PIPELINE_VARIANT_ATTRIBUTE_TRIANGLES,
PIPELINE_VARIANT_ATTRIBUTE_TRIANGLE_STRIP,
PIPELINE_VARIANT_ATTRIBUTE_LINES,
PIPELINE_VARIANT_ATTRIBUTE_LINES_STRIP,
PIPELINE_VARIANT_ATTRIBUTE_POINTS,
PIPELINE_VARIANT_QUAD_LCD_BLEND,
PIPELINE_VARIANT_MAX
};
enum PipelineLightMode {
PIPELINE_LIGHT_MODE_DISABLED,
PIPELINE_LIGHT_MODE_ENABLED,
PIPELINE_LIGHT_MODE_MAX
struct ShaderSpecialization {
union {
struct {
uint32_t use_lighting : 1;
};
uint32_t packed_0;
};
};
struct PipelineVariants {
PipelineCacheRD variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX];
struct PipelineKey {
ShaderVariant variant = SHADER_VARIANT_MAX;
RD::FramebufferFormatID framebuffer_format_id = RD::INVALID_FORMAT_ID;
RD::VertexFormatID vertex_format_id = RD::INVALID_ID;
RD::RenderPrimitive render_primitive = RD::RENDER_PRIMITIVE_MAX;
ShaderSpecialization shader_specialization = {};
uint32_t lcd_blend = 0;
uint32_t ubershader = 0;
uint32_t hash() const {
uint32_t h = hash_murmur3_one_32(variant);
h = hash_murmur3_one_32(framebuffer_format_id, h);
h = hash_murmur3_one_32(vertex_format_id, h);
h = hash_murmur3_one_32(render_primitive, h);
h = hash_murmur3_one_32(shader_specialization.packed_0, h);
h = hash_murmur3_one_32(lcd_blend, h);
h = hash_murmur3_one_32(ubershader, h);
return hash_fmix32(h);
}
};
struct {
CanvasShaderRD canvas_shader;
RID default_version;
RID default_version_rd_shader;
RID quad_index_buffer;
RID quad_index_array;
PipelineVariants pipeline_variants;
ShaderCompiler compiler;
} shader;
struct CanvasShaderData : public RendererRD::MaterialStorage::ShaderData {
enum BlendMode { //used internally
BLEND_MODE_MIX,
BLEND_MODE_ADD,
BLEND_MODE_SUB,
BLEND_MODE_MUL,
BLEND_MODE_PMALPHA,
BLEND_MODE_DISABLED,
};
bool valid = false;
RID version;
PipelineVariants pipeline_variants;
Vector<ShaderCompiler::GeneratedCode::Texture> texture_uniforms;
int blend_mode = 0;
Vector<uint32_t> ubo_offsets;
uint32_t ubo_size = 0;
String code;
RID version;
PipelineHashMapRD<PipelineKey, CanvasShaderData, void (CanvasShaderData::*)(PipelineKey)> pipeline_hash_map;
static const uint32_t VERTEX_INPUT_MASKS_SIZE = SHADER_VARIANT_MAX * 2;
std::atomic<uint64_t> vertex_input_masks[VERTEX_INPUT_MASKS_SIZE] = {};
bool uses_screen_texture = false;
bool uses_screen_texture_mipmaps = false;
bool uses_sdf = false;
bool uses_time = false;
void _clear_vertex_input_mask_cache();
void _create_pipeline(PipelineKey p_pipeline_key);
virtual void set_code(const String &p_Code);
virtual bool is_animated() const;
virtual bool casts_shadows() const;
virtual RS::ShaderNativeSourceCode get_native_source_code() const;
RID get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const;
uint64_t get_vertex_input_mask(ShaderVariant p_shader_variant, bool p_ubershader);
bool is_valid() const;
CanvasShaderData() {}
CanvasShaderData();
virtual ~CanvasShaderData();
};
struct {
// Data must be guaranteed to be erased before the rest on the destructor.
CanvasShaderData *default_version_data = nullptr;
CanvasShaderRD canvas_shader;
RID default_version_rd_shader;
RID quad_index_buffer;
RID quad_index_array;
ShaderCompiler compiler;
uint32_t pipeline_compilations[RS::PIPELINE_SOURCE_MAX] = {};
Mutex mutex;
} shader;
RendererRD::MaterialStorage::ShaderData *_create_shader_func();
static RendererRD::MaterialStorage::ShaderData *_create_shader_funcs() {
return static_cast<RendererCanvasRenderRD *>(singleton)->_create_shader_func();
@ -364,7 +365,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
struct PushConstant {
uint32_t base_instance_index;
uint32_t pad1;
ShaderSpecialization shader_specialization;
uint32_t pad2;
uint32_t pad3;
};
@ -448,11 +449,12 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
RID material;
CanvasMaterialData *material_data = nullptr;
PipelineLightMode light_mode = PipelineLightMode::PIPELINE_LIGHT_MODE_DISABLED;
PipelineVariant pipeline_variant = PipelineVariant::PIPELINE_VARIANT_QUAD;
const Item::Command *command = nullptr;
Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
ShaderVariant shader_variant = SHADER_VARIANT_QUAD;
RD::RenderPrimitive render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES;
bool use_lighting = false;
// batch-specific data
union {
@ -552,9 +554,10 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
uint32_t base_flags = 0;
};
inline RID _get_pipeline_specialization_or_ubershader(CanvasShaderData *p_shader_data, PipelineKey &r_pipeline_key, PushConstant &r_push_constant, RID p_mesh_instance = RID(), void *p_surface = nullptr, uint32_t p_surface_index = 0, RID *r_vertex_array = nullptr);
void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
void _render_batch(RD::DrawListID p_draw_list, PipelineVariants *p_pipeline_variants, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _prepare_batch_texture_info(Batch *p_current_batch, RID p_texture) const;
[[nodiscard]] Batch *_new_batch(bool &r_batch_broken);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch);
@ -589,6 +592,7 @@ public:
virtual void set_shadow_texture_size(int p_size) override;
void set_debug_redraw(bool p_enabled, double p_time, const Color &p_color) override;
uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override;
void set_time(double p_time);
void update() override;