From be1f9a878b733a9655b18767b8825baf67b4c8d8 Mon Sep 17 00:00:00 2001 From: Hugo Locurcio Date: Thu, 10 Mar 2022 01:40:31 +0100 Subject: [PATCH] Use SSE 4.2 as a baseline when compiling Godot This lets the compiler do more optimizations, leading to increased performance for demanding CPU tasks. --- SConstruct | 20 +++++++++++++++++--- modules/raycast/SCsub | 11 +++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/SConstruct b/SConstruct index 1e76c431b79..fb6a7e6ffb2 100644 --- a/SConstruct +++ b/SConstruct @@ -731,11 +731,25 @@ elif env.msvc: ) Exit(255) -# Default architecture flags. -if env["arch"] == "x86_32": - if env.msvc: +# Set x86 CPU instruction sets to use by the compiler's autovectorization. +if env["arch"] == "x86_64": + # On 64-bit x86, enable SSE 4.2 and prior instruction sets (SSE3/SSSE3/SSE4/SSE4.1) to improve performance. + # This is supported on most CPUs released after 2009-2011 (Intel Nehalem, AMD Bulldozer). + # AVX and AVX2 aren't enabled because they aren't available on more recent low-end Intel CPUs. + if env.msvc and not methods.using_clang(env): + # https://stackoverflow.com/questions/64053597/how-do-i-enable-sse4-1-and-sse3-but-not-avx-in-msvc/69328426 + env.Append(CCFLAGS=["/d2archSSE42"]) + else: + # `-msse2` is implied when compiling for x86_64. + env.Append(CCFLAGS=["-msse4.2"]) +elif env["arch"] == "x86_32": + # Be more conservative with instruction sets on 32-bit x86 to improve compatibility. + # SSE and SSE2 are present on all CPUs that support 64-bit, even if running a 32-bit OS. + if env.msvc and not methods.using_clang(env): env.Append(CCFLAGS=["/arch:SSE2"]) else: + # Use `-mfpmath=sse` to use SSE for floating-point math, which is more stable than x87. + # `-mstackrealign` is needed for it to work. env.Append(CCFLAGS=["-msse2", "-mfpmath=sse", "-mstackrealign"]) # Explicitly specify colored output. diff --git a/modules/raycast/SCsub b/modules/raycast/SCsub index 64a5100c778..28a81e3e5d7 100644 --- a/modules/raycast/SCsub +++ b/modules/raycast/SCsub @@ -80,9 +80,16 @@ if env["builtin_embree"]: env_thirdparty.disable_warnings() env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) + # Set x86 CPU instruction sets to use when building Embree's own intrinsics. + # Keep this in sync with Godot's main SConstruct file. + # This is only needed on MSVC, as GCC/Clang will set those defines automatically + # according to compiler instruction set flags. if env["arch"] != "x86_64" or env.msvc: - # Embree needs those, it will automatically use SSE2NEON in ARM - env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"]) + # Embree needs those; it will automatically use SSE2NEON in ARM. + env_thirdparty.Append(CPPDEFINES=["__SSE__", "__SSE2__"]) + + if env["arch"] == "x86_64" and env.msvc: + env_thirdparty.Append(CPPDEFINES=["__SSE3__", "__SSSE3__", "__SSE4_1__", "__SSE4_2__"]) if env["platform"] == "web": env_thirdparty.Append(CXXFLAGS=["-msimd128"])