mirror of https://github.com/cemu-project/Cemu.git
Per-function target attribute on clang and GCC. (#152)
This commit is contained in:
parent
68fa5b32a1
commit
f5972dfbb0
|
@ -1,9 +1,5 @@
|
||||||
project(CemuCafe)
|
project(CemuCafe)
|
||||||
|
|
||||||
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
|
||||||
add_compile_options(-mssse3 -mavx2)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
file(GLOB_RECURSE CPP_FILES *.cpp)
|
file(GLOB_RECURSE CPP_FILES *.cpp)
|
||||||
file(GLOB_RECURSE H_FILES *.h)
|
file(GLOB_RECURSE H_FILES *.h)
|
||||||
|
|
||||||
|
|
|
@ -3,10 +3,18 @@
|
||||||
|
|
||||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||||
|
|
||||||
#if BOOST_OS_LINUX
|
#if __GNUC__
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define ATTRIBUTE_AVX2 __attribute__((target("avx2")))
|
||||||
|
#define ATTRIBUTE_SSE41 __attribute__((target("sse4.1")))
|
||||||
|
#else
|
||||||
|
#define ATTRIBUTE_AVX2
|
||||||
|
#define ATTRIBUTE_SSE41
|
||||||
|
#endif
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
const void* lastPtr;
|
const void* lastPtr;
|
||||||
|
@ -284,10 +292,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
|
||||||
indexMax = std::max(count, 1u) - 1;
|
indexMax = std::max(count, 1u) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
ATTRIBUTE_AVX2
|
||||||
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||||
{
|
{
|
||||||
// using AVX + AVX2 we can process 16 indices at a time
|
// using AVX + AVX2 we can process 16 indices at a time
|
||||||
|
@ -352,14 +357,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat
|
||||||
indexMin = std::min(indexMin, _minIndex);
|
indexMin = std::min(indexMin, _minIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
ATTRIBUTE_SSE41
|
||||||
#pragma clang attribute pop
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
|
||||||
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||||
{
|
{
|
||||||
// SSSE3 & SSE4.1 optimized decoding
|
// SSSE3 & SSE4.1 optimized decoding
|
||||||
|
@ -423,14 +421,7 @@ void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDa
|
||||||
indexMin = std::min(indexMin, _minIndex);
|
indexMin = std::min(indexMin, _minIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
ATTRIBUTE_AVX2
|
||||||
#pragma clang attribute pop
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
|
||||||
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||||
{
|
{
|
||||||
// using AVX + AVX2 we can process 8 indices at a time
|
// using AVX + AVX2 we can process 8 indices at a time
|
||||||
|
@ -497,10 +488,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
|
||||||
indexMin = std::min(indexMin, _minIndex);
|
indexMin = std::min(indexMin, _minIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
|
||||||
#pragma clang attribute pop
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax)
|
void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue