OpenMPT: Enable SSE2 and ARM NEON optimizations for a slight improvement in performance
parent
c2585f5567
commit
7da3324e32
|
@ -183,12 +183,12 @@
|
|||
//#define MPT_WITH_MEDIAFOUNDATION
|
||||
//#define MPT_WITH_MINIMP3
|
||||
//#define MPT_WITH_MINIZ
|
||||
#define MPT_WITH_MPG123
|
||||
#define MPT_WITH_OGG
|
||||
//#define MPT_WITH_MPG123
|
||||
//#define MPT_WITH_OGG
|
||||
//#define MPT_WITH_STBVORBIS
|
||||
#define MPT_WITH_VORBIS
|
||||
#define MPT_WITH_VORBISFILE
|
||||
#define MPT_WITH_ZLIB
|
||||
//#define MPT_WITH_VORBIS
|
||||
//#define MPT_WITH_VORBISFILE
|
||||
//#define MPT_WITH_ZLIB
|
||||
|
||||
#endif // LIBOPENMPT_BUILD
|
||||
|
||||
|
@ -405,10 +405,6 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !MPT_COMPILER_MSVC && defined(ENABLE_ASM)
|
||||
#undef ENABLE_ASM // inline assembly requires MSVC compiler
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_ASM)
|
||||
#if MPT_COMPILER_MSVC && defined(_M_IX86)
|
||||
|
||||
|
@ -448,6 +444,24 @@
|
|||
// Generate AVX2 instructions (only used when the CPU supports it).
|
||||
#define ENABLE_AVX2
|
||||
|
||||
#elif MPT_BUILD_XCODE && defined(__x86_64__)
|
||||
|
||||
// No CPUID enabled, only one code path supported anyway
|
||||
// #define ENABLE_CPUID
|
||||
// Enable the SSE2 intrinsic functions unconditionally
|
||||
#define ENABLE_SSE2
|
||||
|
||||
#elif MPT_BUILD_XCODE && defined(__arm64__)
|
||||
|
||||
// No CPUID, it's kind of a pain on ARM anyway
|
||||
// #define ENABLE_CPUID
|
||||
// Enable the NEON intrinsic functions unconditionally
|
||||
#define ENABLE_NEON
|
||||
|
||||
#else
|
||||
|
||||
#undef ENABLE_ASM
|
||||
|
||||
#endif // arch
|
||||
#endif // ENABLE_ASM
|
||||
|
||||
|
|
|
@ -218,6 +218,54 @@ void InitProcSupport()
|
|||
}
|
||||
|
||||
|
||||
#elif MPT_BUILD_XCODE && defined(__x86_64__)
|
||||
|
||||
|
||||
void InitProcSupport()
|
||||
{
|
||||
|
||||
RealProcSupport = 0;
|
||||
ProcSupport = 0;
|
||||
mpt::String::WriteAutoBuf(ProcVendorID) = "";
|
||||
mpt::String::WriteAutoBuf(ProcBrandID) = "";
|
||||
ProcRawCPUID = 0;
|
||||
ProcFamily = 0;
|
||||
ProcModel = 0;
|
||||
ProcStepping = 0;
|
||||
|
||||
ProcSupport |= PROCSUPPORT_ASM_INTRIN;
|
||||
|
||||
ProcSupport |= PROCSUPPORT_SSE2;
|
||||
|
||||
RealProcSupport = ProcSupport;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#elif MPT_BUILD_XCODE && defined(__arm64__)
|
||||
|
||||
|
||||
void InitProcSupport()
|
||||
{
|
||||
|
||||
RealProcSupport = 0;
|
||||
ProcSupport = 0;
|
||||
mpt::String::WriteAutoBuf(ProcVendorID) = "";
|
||||
mpt::String::WriteAutoBuf(ProcBrandID) = "";
|
||||
ProcRawCPUID = 0;
|
||||
ProcFamily = 0;
|
||||
ProcModel = 0;
|
||||
ProcStepping = 0;
|
||||
|
||||
ProcSupport |= PROCSUPPORT_ASM_INTRIN;
|
||||
|
||||
ProcSupport |= PROCSUPPORT_NEON;
|
||||
|
||||
RealProcSupport = ProcSupport;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#else // !( MPT_COMPILER_MSVC && ENABLE_X86 )
|
||||
|
||||
|
||||
|
@ -229,6 +277,16 @@ void InitProcSupport()
|
|||
|
||||
#endif // MPT_COMPILER_MSVC && ENABLE_X86
|
||||
|
||||
#ifndef MODPLUG_TRACKER
|
||||
static struct initProcSupport
|
||||
{
|
||||
initProcSupport()
|
||||
{
|
||||
InitProcSupport();
|
||||
}
|
||||
} doInitProcSupport;
|
||||
#endif
|
||||
|
||||
#endif // ENABLE_ASM
|
||||
|
||||
|
||||
|
|
|
@ -15,8 +15,7 @@
|
|||
|
||||
OPENMPT_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
#ifdef MODPLUG_TRACKER
|
||||
#ifdef ENABLE_ASM
|
||||
|
||||
#define PROCSUPPORT_ASM_INTRIN 0x00001 // assembly and intrinsics are enabled at runtime
|
||||
#define PROCSUPPORT_CPUID 0x00002 // Processor supports modern cpuid
|
||||
|
@ -31,15 +30,16 @@ OPENMPT_NAMESPACE_BEGIN
|
|||
#define PROCSUPPORT_AVX 0x10000 // Processor supports AVX instructions
|
||||
#define PROCSUPPORT_AVX2 0x20000 // Processor supports AVX2 instructions
|
||||
|
||||
#define PROCSUPPORT_NEON 0x40000 // Processor supports NEON instructions
|
||||
|
||||
static constexpr uint32 PROCSUPPORT_i586 = 0u ;
|
||||
static constexpr uint32 PROCSUPPORT_x86_SSE = 0u | PROCSUPPORT_SSE ;
|
||||
static constexpr uint32 PROCSUPPORT_x86_SSE2 = 0u | PROCSUPPORT_SSE | PROCSUPPORT_SSE2 ;
|
||||
static constexpr uint32 PROCSUPPORT_AMD64 = 0u | PROCSUPPORT_SSE | PROCSUPPORT_SSE2 | PROCSUPPORT_LM;
|
||||
|
||||
#endif
|
||||
static constexpr uint32 PROCSUPPORT_ARM64 = 0u | PROCSUPPORT_NEON ;
|
||||
|
||||
|
||||
#ifdef ENABLE_ASM
|
||||
|
||||
extern uint32 RealProcSupport;
|
||||
extern uint32 ProcSupport;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -19,6 +19,10 @@
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include "../common/sse2neon.h"
|
||||
#endif
|
||||
|
||||
#endif // NO_REVERB
|
||||
|
||||
|
||||
|
@ -28,7 +32,7 @@ OPENMPT_NAMESPACE_BEGIN
|
|||
#ifndef NO_REVERB
|
||||
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
// Load two 32-bit values
|
||||
static MPT_FORCEINLINE __m128i Load64SSE(const int32 *x) { return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(x)); }
|
||||
// Load four 16-bit values
|
||||
|
@ -594,6 +598,11 @@ void CReverb::ReverbProcessPostFiltering1x(const int32 * MPT_RESTRICT pRvb, int3
|
|||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
__m128i nDCRRvb_Y1 = Load64SSE(gnDCRRvb_Y1);
|
||||
__m128i nDCRRvb_X1 = Load64SSE(gnDCRRvb_X1);
|
||||
|
@ -656,6 +665,11 @@ void CReverb::ReverbDCRemoval(int32 * MPT_RESTRICT pBuffer, uint32 nSamples)
|
|||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
__m128i nDCRRvb_Y1 = Load64SSE(gnDCRRvb_Y1);
|
||||
__m128i nDCRRvb_X1 = Load64SSE(gnDCRRvb_X1);
|
||||
|
@ -721,6 +735,11 @@ void CReverb::ProcessPreDelay(SWRvbRefDelay * MPT_RESTRICT pPreDelay, const int3
|
|||
uint32 delayPos = pPreDelay->nDelayPos - 1;
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
__m128i coeffs = _mm_cvtsi32_si128(pPreDelay->nCoeffs.lr);
|
||||
__m128i history = _mm_cvtsi32_si128(pPreDelay->History.lr);
|
||||
|
@ -793,6 +812,11 @@ void CReverb::ProcessReflections(SWRvbRefDelay * MPT_RESTRICT pPreDelay, LR16 *
|
|||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
union
|
||||
{
|
||||
|
@ -888,6 +912,11 @@ void CReverb::ProcessLateReverb(SWLateReverb * MPT_RESTRICT pReverb, LR16 * MPT_
|
|||
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
int delayPos = pReverb->nDelayPos & RVBDLY_MASK;
|
||||
__m128i rvbOutGains = Load64SSE(pReverb->RvbOutGains);
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
#include "../common/sse2neon.h"
|
||||
#endif
|
||||
|
||||
|
||||
OPENMPT_NAMESPACE_BEGIN
|
||||
|
@ -25,7 +28,7 @@ OPENMPT_NAMESPACE_BEGIN
|
|||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE Optimizations
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
|
||||
static void SSE2_StereoMixToFloat(const int32 *pSrc, float *pOut1, float *pOut2, uint32 nCount, const float _i2fc)
|
||||
{
|
||||
|
@ -182,12 +185,17 @@ void StereoMixToFloat(const int32 *pSrc, float *pOut1, float *pOut2, uint32 nCou
|
|||
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
SSE2_StereoMixToFloat(pSrc, pOut1, pOut2, nCount, _i2fc);
|
||||
return;
|
||||
}
|
||||
#endif // ENABLE_SSE2
|
||||
|
||||
#endif // ENABLE_SSE2 || ENABLE_NEON
|
||||
|
||||
{
|
||||
C_StereoMixToFloat(pSrc, pOut1, pOut2, nCount, _i2fc);
|
||||
}
|
||||
|
@ -199,12 +207,17 @@ void FloatToStereoMix(const float *pIn1, const float *pIn2, int32 *pOut, uint32
|
|||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
if(GetProcSupport() & PROCSUPPORT_SSE2)
|
||||
#endif
|
||||
#ifdef ENABLE_NEON
|
||||
if(GetProcSupport() & PROCSUPPORT_NEON)
|
||||
#endif
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_NEON)
|
||||
{
|
||||
SSE2_FloatToStereoMix(pIn1, pIn2, pOut, nCount, _f2ic);
|
||||
return;
|
||||
}
|
||||
#endif // ENABLE_SSE2
|
||||
|
||||
#endif // ENABLE_SSE2 || ENABLE_NEON
|
||||
|
||||
{
|
||||
C_FloatToStereoMix(pIn1, pIn2, pOut, nCount, _f2ic);
|
||||
}
|
||||
|
|
|
@ -13,5 +13,8 @@
|
|||
|
||||
#define MPT_WITH_ZLIB 1
|
||||
|
||||
#define MPT_BUILD_XCODE 1
|
||||
#define ENABLE_ASM 1
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -87,7 +87,6 @@
|
|||
83E5FC951FFEFA0D00659F0F /* mptOS.h in Headers */ = {isa = PBXBuildFile; fileRef = 83E5FC5C1FFEFA0D00659F0F /* mptOS.h */; };
|
||||
83E5FC961FFEFA0D00659F0F /* mptIO.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83E5FC5D1FFEFA0D00659F0F /* mptIO.cpp */; };
|
||||
83E5FC971FFEFA0D00659F0F /* mptCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = 83E5FC5E1FFEFA0D00659F0F /* mptCPU.h */; };
|
||||
83E5FC981FFEFA0D00659F0F /* mptBufferIO.h in Headers */ = {isa = PBXBuildFile; fileRef = 83E5FC5F1FFEFA0D00659F0F /* mptBufferIO.h */; };
|
||||
83E5FC991FFEFA0D00659F0F /* versionNumber.h in Headers */ = {isa = PBXBuildFile; fileRef = 83E5FC601FFEFA0D00659F0F /* versionNumber.h */; };
|
||||
83E5FC9A1FFEFA0D00659F0F /* misc_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83E5FC611FFEFA0D00659F0F /* misc_util.cpp */; };
|
||||
83E5FCCB1FFEFA1A00659F0F /* libopenmpt_impl.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 83E5FC9C1FFEFA1A00659F0F /* libopenmpt_impl.hpp */; };
|
||||
|
@ -370,7 +369,6 @@
|
|||
83E5FC5C1FFEFA0D00659F0F /* mptOS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mptOS.h; sourceTree = "<group>"; };
|
||||
83E5FC5D1FFEFA0D00659F0F /* mptIO.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mptIO.cpp; sourceTree = "<group>"; };
|
||||
83E5FC5E1FFEFA0D00659F0F /* mptCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mptCPU.h; sourceTree = "<group>"; };
|
||||
83E5FC5F1FFEFA0D00659F0F /* mptBufferIO.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mptBufferIO.h; sourceTree = "<group>"; };
|
||||
83E5FC601FFEFA0D00659F0F /* versionNumber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = versionNumber.h; sourceTree = "<group>"; };
|
||||
83E5FC611FFEFA0D00659F0F /* misc_util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = misc_util.cpp; sourceTree = "<group>"; };
|
||||
83E5FC9C1FFEFA1A00659F0F /* libopenmpt_impl.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = libopenmpt_impl.hpp; sourceTree = "<group>"; };
|
||||
|
@ -665,7 +663,6 @@
|
|||
831132CB21F955B0001F678F /* mptBaseMacros.h */,
|
||||
831132CE21F955B1001F678F /* mptBaseTypes.h */,
|
||||
831132D121F955B1001F678F /* mptBaseUtils.h */,
|
||||
83E5FC5F1FFEFA0D00659F0F /* mptBufferIO.h */,
|
||||
83E5FC3D1FFEFA0D00659F0F /* mptCPU.cpp */,
|
||||
83E5FC5E1FFEFA0D00659F0F /* mptCPU.h */,
|
||||
83E5FC2A1FFEFA0D00659F0F /* mptCRC.h */,
|
||||
|
@ -1050,7 +1047,6 @@
|
|||
83E5FE121FFEFA8500659F0F /* XMTools.h in Headers */,
|
||||
83E5FCCB1FFEFA1A00659F0F /* libopenmpt_impl.hpp in Headers */,
|
||||
831132E821F9565F001F678F /* BitReader.h in Headers */,
|
||||
83E5FC981FFEFA0D00659F0F /* mptBufferIO.h in Headers */,
|
||||
83E5FC741FFEFA0D00659F0F /* BuildSettings.h in Headers */,
|
||||
831132D521F955B2001F678F /* mptMemory.h in Headers */,
|
||||
831132DE21F955B2001F678F /* mptException.h in Headers */,
|
||||
|
|
Loading…
Reference in New Issue