diff --git a/release/datafiles/userdef/userdef_default.c b/release/datafiles/userdef/userdef_default.c index e81d5a32c26..dbf9f37a22e 100644 --- a/release/datafiles/userdef/userdef_default.c +++ b/release/datafiles/userdef/userdef_default.c @@ -116,7 +116,8 @@ const UserDef U_default = { #else .gpu_backend = GPU_BACKEND_OPENGL, #endif - .max_shader_compilation_subprocesses = 0, + .gpu_shader_workers = 0, + .shader_compilation_method = USER_SHADER_COMPILE_THREAD, /** Initialized by: #BKE_studiolight_default. */ .light_param = {{0}}, diff --git a/scripts/modules/rna_manual_reference.py b/scripts/modules/rna_manual_reference.py index 56cec1aa308..e1250f32156 100644 --- a/scripts/modules/rna_manual_reference.py +++ b/scripts/modules/rna_manual_reference.py @@ -30,7 +30,6 @@ url_manual_mapping = ( ("bpy.types.clothcollisionsettings.vertex_group_object_collisions*", "physics/cloth/settings/collisions.html#bpy-types-clothcollisionsettings-vertex-group-object-collisions"), ("bpy.types.gpencilsculptsettings.use_automasking_material_active*", "grease_pencil/modes/sculpting/introduction.html#bpy-types-gpencilsculptsettings-use-automasking-material-active"), ("bpy.types.gpencilsculptsettings.use_automasking_material_stroke*", "grease_pencil/modes/sculpting/introduction.html#bpy-types-gpencilsculptsettings-use-automasking-material-stroke"), - ("bpy.types.preferencessystem.max_shader_compilation_subprocesses*", "editors/preferences/system.html#bpy-types-preferencessystem-max-shader-compilation-subprocesses"), ("bpy.types.cycleslightsettings.use_multiple_importance_sampling*", "render/cycles/light_settings.html#bpy-types-cycleslightsettings-use-multiple-importance-sampling"), ("bpy.types.fluiddomainsettings.sndparticle_potential_max_energy*", "physics/fluid/type/domain/liquid/particles.html#bpy-types-fluiddomainsettings-sndparticle-potential-max-energy"), ("bpy.types.fluiddomainsettings.sndparticle_potential_min_energy*", "physics/fluid/type/domain/liquid/particles.html#bpy-types-fluiddomainsettings-sndparticle-potential-min-energy"), @@ -80,6 +79,7 @@ url_manual_mapping = ( ("bpy.types.preferencesedit.grease_pencil_euclidean_distance*", "editors/preferences/editing.html#bpy-types-preferencesedit-grease-pencil-euclidean-distance"), ("bpy.types.preferencesedit.grease_pencil_manhattan_distance*", "editors/preferences/editing.html#bpy-types-preferencesedit-grease-pencil-manhattan-distance"), ("bpy.types.preferencesinput.mouse_emulate_3_button_modifier*", "editors/preferences/input.html#bpy-types-preferencesinput-mouse-emulate-3-button-modifier"), + ("bpy.types.preferencessystem.max_shader_compilation_workers*", "editors/preferences/system.html#bpy-types-preferencessystem-max-shader-compilation-workers"), ("bpy.types.brushgpencilsettings.use_stroke_random_strength*", "grease_pencil/modes/draw/brushes/draw.html#bpy-types-brushgpencilsettings-use-stroke-random-strength"), ("bpy.types.clothsettings.vertex_group_structural_stiffness*", "physics/cloth/settings/property_weights.html#bpy-types-clothsettings-vertex-group-structural-stiffness"), ("bpy.types.cyclesrendersettings.film_transparent_roughness*", "render/cycles/render_settings/film.html#bpy-types-cyclesrendersettings-film-transparent-roughness"), diff --git a/scripts/startup/bl_ui/space_userpref.py b/scripts/startup/bl_ui/space_userpref.py index 9952ca2498f..71e2e251a28 100644 --- a/scripts/startup/bl_ui/space_userpref.py +++ b/scripts/startup/bl_ui/space_userpref.py @@ -809,8 +809,11 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel): if sys.platform != "darwin": layout.separator() - col = layout.column() - col.prop(system, "max_shader_compilation_subprocesses") + col = layout.column(align=True) + col.active = system.gpu_backend != 'VULKAN' + col.row().prop(system, "shader_compilation_method", expand=True) + label = "Threads" if system.shader_compilation_method == 'THREAD' else "Subprocesses" + col.prop(system, "gpu_shader_workers", text=label) class USERPREF_PT_system_video_sequencer(SystemPanel, CenterAlignMixIn, Panel): diff --git a/source/blender/blenkernel/BKE_blender_version.h b/source/blender/blenkernel/BKE_blender_version.h index c3c1ece4fa9..9fd1a0c3f44 100644 --- a/source/blender/blenkernel/BKE_blender_version.h +++ b/source/blender/blenkernel/BKE_blender_version.h @@ -27,7 +27,7 @@ /* Blender file format version. */ #define BLENDER_FILE_VERSION BLENDER_VERSION -#define BLENDER_FILE_SUBVERSION 85 +#define BLENDER_FILE_SUBVERSION 86 /* Minimum Blender version that supports reading file written with the current * version. Older Blender versions will test this and cancel loading the file, showing a warning to diff --git a/source/blender/blenloader/intern/versioning_userdef.cc b/source/blender/blenloader/intern/versioning_userdef.cc index afa4b9197b4..ba1d39df4b6 100644 --- a/source/blender/blenloader/intern/versioning_userdef.cc +++ b/source/blender/blenloader/intern/versioning_userdef.cc @@ -1496,6 +1496,12 @@ void blo_do_versions_userdef(UserDef *userdef) } } + if (!USER_VERSION_ATLEAST(405, 86)) { + if (userdef->gpu_shader_workers > 0) { + userdef->shader_compilation_method = USER_SHADER_COMPILE_SUBPROCESS; + } + } + /** * Always bump subversion in BKE_blender_version.h when adding versioning * code here, and wrap it inside a USER_VERSION_ATLEAST check. diff --git a/source/blender/draw/engines/eevee/eevee_instance.cc b/source/blender/draw/engines/eevee/eevee_instance.cc index 509c49ce817..d566e1870e8 100644 --- a/source/blender/draw/engines/eevee/eevee_instance.cc +++ b/source/blender/draw/engines/eevee/eevee_instance.cc @@ -722,11 +722,14 @@ void Instance::draw_viewport() } if (materials.queued_shaders_count > 0) { info_append_i18n("Compiling shaders ({} remaining)", materials.queued_shaders_count); - if (!GPU_use_parallel_compilation() && - GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) + if (GPU_backend_get_type() == GPU_BACKEND_OPENGL && !GPU_use_subprocess_compilation() && + /* Only recommend subprocesses when there is known gain. */ + (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) || + GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_ANY) || + GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_OFFICIAL))) { info_append_i18n( - "Increasing Preferences > System > Max Shader Compilation Subprocesses may improve " + "Setting Preferences > System > Shader Compilation Method to Subprocess might improve " "compilation time."); } } diff --git a/source/blender/gpu/GPU_capabilities.hh b/source/blender/gpu/GPU_capabilities.hh index f8a36498127..62bb7ab6af6 100644 --- a/source/blender/gpu/GPU_capabilities.hh +++ b/source/blender/gpu/GPU_capabilities.hh @@ -43,7 +43,7 @@ const char *GPU_extension_get(int i); int GPU_texture_size_with_limit(int res); -bool GPU_use_parallel_compilation(); +bool GPU_use_subprocess_compilation(); int GPU_max_parallel_compilations(); bool GPU_stencil_clasify_buffer_workaround(); diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index d460d1743da..83e89a85070 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -131,9 +131,9 @@ int GPU_max_samplers() return GCaps.max_samplers; } -bool GPU_use_parallel_compilation() +bool GPU_use_subprocess_compilation() { - return GCaps.max_parallel_compilations > 0; + return GCaps.use_subprocess_shader_compilations; } int GPU_max_parallel_compilations() diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh index 6d5c887efb2..982c009fb9b 100644 --- a/source/blender/gpu/intern/gpu_capabilities_private.hh +++ b/source/blender/gpu/intern/gpu_capabilities_private.hh @@ -63,6 +63,8 @@ struct GPUCapabilities { bool node_link_instancing_workaround = false; bool line_directive_workaround = false; + bool use_subprocess_shader_compilations = false; + /* Vulkan related workarounds. */ bool render_pass_workaround = false; diff --git a/source/blender/gpu/intern/gpu_pass.cc b/source/blender/gpu/intern/gpu_pass.cc index 7845127833e..f78ab33cc26 100644 --- a/source/blender/gpu/intern/gpu_pass.cc +++ b/source/blender/gpu/intern/gpu_pass.cc @@ -165,9 +165,8 @@ bool GPU_pass_should_optimize(GPUPass *pass) return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize; #if 0 - /* Returns optimization heuristic prepared during initial codegen. - * NOTE: Optimization limited to parallel compilation as it causes CPU stalls otherwise. */ - return pass->should_optimize && GPU_use_parallel_compilation(); + /* Returns optimization heuristic prepared during initial codegen. */ + return pass->should_optimize; #endif } diff --git a/source/blender/gpu/intern/gpu_shader_create_info.cc b/source/blender/gpu/intern/gpu_shader_create_info.cc index 6e10d388888..0bcd518a7c3 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.cc +++ b/source/blender/gpu/intern/gpu_shader_create_info.cc @@ -574,16 +574,8 @@ bool gpu_shader_create_info_compile(const char *name_starts_with_filter) } } - Vector result; - if (GPU_use_parallel_compilation() == false) { - for (const GPUShaderCreateInfo *info : infos) { - result.append(GPU_shader_create_from_info(info)); - } - } - else { - BatchHandle batch = GPU_shader_batch_create_from_infos(infos); - result = GPU_shader_batch_finalize(batch); - } + BatchHandle batch = GPU_shader_batch_create_from_infos(infos); + Vector result = GPU_shader_batch_finalize(batch); for (int i : result.index_range()) { const ShaderCreateInfo *info = reinterpret_cast(infos[i]); diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm index 5c1bed55b60..78d40b466a8 100644 --- a/source/blender/gpu/metal/mtl_shader.mm +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -1551,7 +1551,6 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state( MTLShaderCompiler::MTLShaderCompiler() : ShaderCompiler(GPU_max_parallel_compilations(), GPUWorker::ContextType::PerThread, true) { - BLI_assert(GPU_use_parallel_compilation()); } Shader *MTLShaderCompiler::compile_shader(const shader::ShaderCreateInfo &info) diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index c0d6b17d219..e2d7ebbfe94 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -723,17 +723,66 @@ void GLBackend::capabilities_init() detect_workarounds(); #if BLI_SUBPROCESS_SUPPORT - if (GCaps.max_parallel_compilations == -1) { - GCaps.max_parallel_compilations = std::min(int(U.max_shader_compilation_subprocesses), - BLI_system_thread_count()); - } + GCaps.use_subprocess_shader_compilations = U.shader_compilation_method == + USER_SHADER_COMPILE_SUBPROCESS; +#else + GCaps.use_subprocess_shader_compilations = false; +#endif if (G.debug & G_DEBUG_GPU_RENDERDOC) { /* Avoid crashes on RenderDoc sessions. */ - GCaps.max_parallel_compilations = 0; + GCaps.use_subprocess_shader_compilations = false; } -#else - GCaps.max_parallel_compilations = 0; -#endif + + int thread_count = U.gpu_shader_workers; + + if (thread_count == 0) { + /* Good default based on measurements. */ + + /* Always have at least 1 worker. */ + thread_count = 1; + + if (GCaps.use_subprocess_shader_compilations) { + /* Use reasonable number of worker by default when there are known gains. */ + if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL) || + GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_OFFICIAL) || + GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_ANY)) + { + /* Subprocess is too costly in memory (>150MB per worker) to have better defaults. */ + thread_count = std::max(1, std::min(4, BLI_system_thread_count() / 2)); + } + } + else if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL)) { + /* Best middle ground between memory usage and speedup as Nvidia context memory footprint + * is quite heavy (~25MB). Moreover we have diminishing return after this because of PSO + * compilation blocking the main thread. + * Can be revisited if we find a way to delete the worker thread context after finishing + * compilation, and fix the scheduling bubbles (#139775). */ + thread_count = 4; + } + else if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_OPENSOURCE) || + GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_UNIX, GPU_DRIVER_ANY)) + { + /* Mesa has very good compilation time and doesn't block the main thread. + * The memory footprint of the worker context is rather small (<10MB). + * Shader compilation gets much slower as the number of threads increases. */ + thread_count = 8; + } + else if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_OFFICIAL)) { + /* AMD proprietary driver's context have huge memory footprint (~45MB). + * There is also not much gain from parallelization. */ + thread_count = 1; + } + else if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_ANY)) { + /* Intel windows driver offer almost no speedup with parallel compilation. */ + thread_count = 1; + } + } + + /* Allow thread count override option to limit the number of workers and avoid allocating more + * workers than needed. Also ensures that there is always 1 thread available for the UI. */ + int max_thread_count = std::max(1, BLI_system_thread_count() - 1); + + GCaps.max_parallel_compilations = std::min(thread_count, max_thread_count); /* Disable this feature entirely when not debugging. */ if ((G.debug & G_DEBUG_GPU) == 0) { diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 2fe45d0fa66..9313d1f87cf 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -9,10 +9,15 @@ #pragma once #include "GPU_capabilities.hh" +#include "GPU_platform.hh" + #include "gpu_backend.hh" +#include "BLI_threads.h" #include "BLI_vector.hh" +#include "gpu_capabilities_private.hh" + #ifdef WITH_RENDERDOC # include "renderdoc_api.hh" #endif @@ -56,7 +61,7 @@ class GLBackend : public GPUBackend { void init_resources() override { - if (GPU_use_parallel_compilation()) { + if (GCaps.use_subprocess_shader_compilations) { compiler_ = MEM_new(__func__); } else { diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 2d1257c2380..1c6de5daa66 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -198,8 +198,8 @@ class GLShader : public Shader { class GLShaderCompiler : public ShaderCompiler { public: - GLShaderCompiler(uint32_t threads_count = 1) - : ShaderCompiler(threads_count, GPUWorker::ContextType::PerThread, true){}; + GLShaderCompiler() + : ShaderCompiler(GPU_max_parallel_compilations(), GPUWorker::ContextType::PerThread, true){}; virtual void specialize_shader(ShaderSpecialization &specialization) override; }; diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h index 13b1600c4c8..71a2c1b5fcf 100644 --- a/source/blender/makesdna/DNA_userdef_types.h +++ b/source/blender/makesdna/DNA_userdef_types.h @@ -487,13 +487,17 @@ typedef struct UserDef { int gpu_preferred_index; uint32_t gpu_preferred_vendor_id; uint32_t gpu_preferred_device_id; - char _pad16[4]; + + /** Max number of parallel shader compilation workers. */ + short gpu_shader_workers; + /** eUserpref_ShaderCompileMethod (OpenGL only). */ + short shader_compilation_method; + + char _pad16[2]; + /** #eGPUBackendType */ short gpu_backend; - /** Max number of parallel shader compilation subprocesses. */ - short max_shader_compilation_subprocesses; - /** Number of samples for FPS display calculations. */ short playback_fps_samples; @@ -1125,6 +1129,11 @@ typedef enum eUserpref_SeqEditorFlags { USER_SEQ_ED_CONNECT_STRIPS_BY_DEFAULT = (1 << 1), } eUserpref_SeqEditorFlags; +typedef enum eUserpref_ShaderCompileMethod { + USER_SHADER_COMPILE_THREAD = 0, + USER_SHADER_COMPILE_SUBPROCESS = 1, +} eUserpref_ShaderCompileMethod; + /* Locale Ids. Auto will try to get local from OS. Our default is English though. */ /** #UserDef.language */ enum { diff --git a/source/blender/makesdna/intern/dna_rename_defs.h b/source/blender/makesdna/intern/dna_rename_defs.h index 40e0553b2cc..690194786d7 100644 --- a/source/blender/makesdna/intern/dna_rename_defs.h +++ b/source/blender/makesdna/intern/dna_rename_defs.h @@ -215,6 +215,7 @@ DNA_STRUCT_RENAME_MEMBER(UVProjectModifierData, num_projectors, projectors_num) DNA_STRUCT_RENAME_MEMBER(UserDef, autokey_flag, keying_flag) DNA_STRUCT_RENAME_MEMBER(UserDef, gp_manhattendist, gp_manhattandist) DNA_STRUCT_RENAME_MEMBER(UserDef, pythondir, pythondir_legacy) +DNA_STRUCT_RENAME_MEMBER(UserDef, max_shader_compilation_subprocesses, gpu_shader_workers) DNA_STRUCT_RENAME_MEMBER(VFont, name, filepath) DNA_STRUCT_RENAME_MEMBER(View3D, far, clip_end) DNA_STRUCT_RENAME_MEMBER(View3D, local_collections_uuid, local_collections_uid) diff --git a/source/blender/makesrna/intern/rna_userdef.cc b/source/blender/makesrna/intern/rna_userdef.cc index c2543906a96..e7e0aa9927c 100644 --- a/source/blender/makesrna/intern/rna_userdef.cc +++ b/source/blender/makesrna/intern/rna_userdef.cc @@ -6349,14 +6349,36 @@ static void rna_def_userdef_system(BlenderRNA *brna) "Preferred device to select during detection (requires restarting " "Blender for changes to take effect)"); - prop = RNA_def_property(srna, "max_shader_compilation_subprocesses", PROP_INT, PROP_NONE); - RNA_def_property_range(prop, 0, INT16_MAX); + prop = RNA_def_property(srna, "gpu_shader_workers", PROP_INT, PROP_NONE); + RNA_def_property_range(prop, 0, 32); RNA_def_property_ui_text(prop, - "Max Shader Compilation Subprocesses", - "Max number of parallel shader compilation subprocesses, " + "Shader Compilation Workers", + "Number of shader compilation threads or subprocesses, " "clamped at the max threads supported by the CPU " "(requires restarting Blender for changes to take effect). " - "Setting it to 0 disables subprocess shader compilation."); + "A higher number increases the RAM usage while reducing " + "compilation time. A value of 0 will use automatic configuration. " + "(OpenGL only)"); + + static const EnumPropertyItem shader_compilation_method_items[] = { + {USER_SHADER_COMPILE_THREAD, "THREAD", 0, "Thread", "Use threads for compiling shaders"}, + {USER_SHADER_COMPILE_SUBPROCESS, + "SUBPROCESS", + 0, + "Subprocess", + "Use subprocesses for compiling shaders"}, + {0, nullptr, 0, nullptr, nullptr}, + }; + + prop = RNA_def_property(srna, "shader_compilation_method", PROP_ENUM, PROP_NONE); + RNA_def_property_enum_items(prop, shader_compilation_method_items); + RNA_def_property_ui_text(prop, + "Shader Compilation Method", + "Compilation method used for compiling shaders in parallel. " + "Subprocess requires a lot more RAM for each worker " + "but might compile shaders faster on some systems. " + "Requires restarting Blender for changes to take effect. " + "(OpenGL only)"); /* Network. */