Per-function target attribute on clang and GCC. (#152)

This commit is contained in:
Tom Lally 2022-09-02 18:10:41 +01:00 committed by GitHub
parent 68fa5b32a1
commit f5972dfbb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 29 deletions

View File

@ -1,9 +1,5 @@
project(CemuCafe) project(CemuCafe)
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
add_compile_options(-mssse3 -mavx2)
endif()
file(GLOB_RECURSE CPP_FILES *.cpp) file(GLOB_RECURSE CPP_FILES *.cpp)
file(GLOB_RECURSE H_FILES *.h) file(GLOB_RECURSE H_FILES *.h)

View File

@ -3,10 +3,18 @@
#include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/ISA/RegDefines.h"
#if BOOST_OS_LINUX #if __GNUC__
#include <immintrin.h> #include <immintrin.h>
#endif #endif
#ifdef __GNUC__
#define ATTRIBUTE_AVX2 __attribute__((target("avx2")))
#define ATTRIBUTE_SSE41 __attribute__((target("sse4.1")))
#else
#define ATTRIBUTE_AVX2
#define ATTRIBUTE_SSE41
#endif
struct struct
{ {
const void* lastPtr; const void* lastPtr;
@ -284,10 +292,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
indexMax = std::max(count, 1u) - 1; indexMax = std::max(count, 1u) - 1;
} }
#if BOOST_OS_LINUX || BOOST_OS_MACOS ATTRIBUTE_AVX2
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{ {
// using AVX + AVX2 we can process 16 indices at a time // using AVX + AVX2 we can process 16 indices at a time
@ -352,14 +357,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat
indexMin = std::min(indexMin, _minIndex); indexMin = std::min(indexMin, _minIndex);
} }
#if BOOST_OS_LINUX || BOOST_OS_MACOS ATTRIBUTE_SSE41
#pragma clang attribute pop
#endif
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{ {
// SSSE3 & SSE4.1 optimized decoding // SSSE3 & SSE4.1 optimized decoding
@ -423,14 +421,7 @@ void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDa
indexMin = std::min(indexMin, _minIndex); indexMin = std::min(indexMin, _minIndex);
} }
#if BOOST_OS_LINUX || BOOST_OS_MACOS ATTRIBUTE_AVX2
#pragma clang attribute pop
#endif
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{ {
// using AVX + AVX2 we can process 8 indices at a time // using AVX + AVX2 we can process 8 indices at a time
@ -497,10 +488,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
indexMin = std::min(indexMin, _minIndex); indexMin = std::min(indexMin, _minIndex);
} }
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
template<typename T> template<typename T>
void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax) void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax)
{ {