From f5972dfbb0c3fafadaba85ad2ddd8e8fec655ee3 Mon Sep 17 00:00:00 2001 From: Tom Lally Date: Fri, 2 Sep 2022 18:10:41 +0100 Subject: [PATCH] Per-function target attribute on clang and GCC. (#152) --- src/Cafe/CMakeLists.txt | 4 --- src/Cafe/HW/Latte/Core/LatteIndices.cpp | 37 ++++++++----------------- 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index c27708f3..776aef59 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -1,9 +1,5 @@ project(CemuCafe) -if(CMAKE_C_COMPILER_ID STREQUAL "GNU") - add_compile_options(-mssse3 -mavx2) -endif() - file(GLOB_RECURSE CPP_FILES *.cpp) file(GLOB_RECURSE H_FILES *.h) diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 7edaad00..e5f3364a 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -3,10 +3,18 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" -#if BOOST_OS_LINUX +#if __GNUC__ #include #endif +#ifdef __GNUC__ +#define ATTRIBUTE_AVX2 __attribute__((target("avx2"))) +#define ATTRIBUTE_SSE41 __attribute__((target("sse4.1"))) +#else +#define ATTRIBUTE_AVX2 +#define ATTRIBUTE_SSE41 +#endif + struct { const void* lastPtr; @@ -284,10 +292,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun indexMax = std::max(count, 1u) - 1; } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) -#endif - +ATTRIBUTE_AVX2 void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { // using AVX + AVX2 we can process 16 indices at a time @@ -352,14 +357,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat indexMin = std::min(indexMin, _minIndex); } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute pop -#endif - -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) -#endif - +ATTRIBUTE_SSE41 void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { // SSSE3 & SSE4.1 optimized decoding @@ -423,14 +421,7 @@ void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDa indexMin = std::min(indexMin, _minIndex); } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute pop -#endif - -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) -#endif - +ATTRIBUTE_AVX2 void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { // using AVX + AVX2 we can process 8 indices at a time @@ -497,10 +488,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat indexMin = std::min(indexMin, _minIndex); } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute pop -#endif - template void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax) {