mirror of
https://github.com/libsdl-org/SDL.git
synced 2026-06-04 19:55:19 +00:00
Add SVE2 SIMD Alpha-Blending Blitter (#15504)
SVE/SVE2 is a new SIMD extension for AArch64. Compared to NEON, SVE/SVE2 brings the following benefits that are good for SDL projects: - Lane prediction: we don't have to treat the tail part of a stride separately when the width is n times the hardware vector size - Although the performance is almost no difference from NEON when the hardware vector size is 128bits, when the hardware provides a longer vector size, e.g. 256, 512, ... 2048, we can enjoy the large performance gain without modifying the source code or recompiling a library. The functional correctness is validated in a dedicated [qemu project](https://github.com/GorgonMeducer/aarch64_qemu_mac_template/tree/SDL-SVE2-Acceleration-Validation). The performance is tested on [Radxa Orion 6 N](https://radxa.com/products/orion/o6n/), which provides 4x A720 and 4x A520 processors. Since the vector size is 128 bits, which is the same as NEON, the performance is almost the same (or no worse than) the NEON acceleration.
This commit is contained in:
parent
0e5e772ba9
commit
0f175891a6
21 changed files with 4116 additions and 0 deletions
|
|
@ -84,6 +84,7 @@ LOCAL_SRC_FILES := \
|
||||||
$(wildcard $(LOCAL_PATH)/src/tray/*.c) \
|
$(wildcard $(LOCAL_PATH)/src/tray/*.c) \
|
||||||
$(wildcard $(LOCAL_PATH)/src/video/*.c) \
|
$(wildcard $(LOCAL_PATH)/src/video/*.c) \
|
||||||
$(wildcard $(LOCAL_PATH)/src/video/android/*.c) \
|
$(wildcard $(LOCAL_PATH)/src/video/android/*.c) \
|
||||||
|
$(wildcard $(LOCAL_PATH)/src/video/arm/*.c) \
|
||||||
$(wildcard $(LOCAL_PATH)/src/video/yuv2rgb/*.c))
|
$(wildcard $(LOCAL_PATH)/src/video/yuv2rgb/*.c))
|
||||||
|
|
||||||
LOCAL_CFLAGS += -DGL_GLEXT_PROTOTYPES
|
LOCAL_CFLAGS += -DGL_GLEXT_PROTOTYPES
|
||||||
|
|
|
||||||
|
|
@ -314,6 +314,7 @@ dep_option(SDL_SSE4_2 "Use SSE4.2 assembly routines" ON "SDL_ASSEMB
|
||||||
dep_option(SDL_MMX "Use MMX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_MMX "Use MMX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
|
dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
|
||||||
dep_option(SDL_ARMNEON "Use NEON assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM32 OR SDL_CPU_ARM64" OFF)
|
dep_option(SDL_ARMNEON "Use NEON assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM32 OR SDL_CPU_ARM64" OFF)
|
||||||
|
dep_option(SDL_ARMSVE2 "Use SVE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM64" OFF)
|
||||||
dep_option(SDL_LSX "Use LSX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
dep_option(SDL_LSX "Use LSX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
||||||
dep_option(SDL_LASX "Use LASX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
dep_option(SDL_LASX "Use LASX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
||||||
|
|
||||||
|
|
@ -939,6 +940,37 @@ if(SDL_ASSEMBLY)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(SDL_ARMSVE2)
|
||||||
|
cmake_push_check_state()
|
||||||
|
string(APPEND CMAKE_REQUIRED_FLAGS " -march=armv8-a+sve2")
|
||||||
|
check_arm_source_compiles([==[
|
||||||
|
#include <arm_sve.h>
|
||||||
|
svuint32_t sve2_test(svuint32_t a, svuint32_t b) {
|
||||||
|
return svadd_u32_x(svptrue_b32(), a, b);
|
||||||
|
}
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
sve2_test(svdup_u32(0), svdup_u32(0));
|
||||||
|
return 0;
|
||||||
|
}]==] COMPILER_SUPPORTS_ARMSVE2)
|
||||||
|
if(COMPILER_SUPPORTS_ARMSVE2)
|
||||||
|
set(HAVE_ARMSVE2 TRUE)
|
||||||
|
endif()
|
||||||
|
cmake_pop_check_state()
|
||||||
|
|
||||||
|
if(HAVE_ARMSVE2)
|
||||||
|
sdl_sources(
|
||||||
|
"${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_A.c"
|
||||||
|
"${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_N.c"
|
||||||
|
)
|
||||||
|
set_source_files_properties(
|
||||||
|
"${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_A.c"
|
||||||
|
"${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_N.c"
|
||||||
|
PROPERTIES
|
||||||
|
SKIP_PRECOMPILE_HEADERS ON
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(USE_GCC OR USE_CLANG)
|
if(USE_GCC OR USE_CLANG)
|
||||||
# TODO: Those all seem to be quite GCC specific - needs to be
|
# TODO: Those all seem to be quite GCC specific - needs to be
|
||||||
# reworked for better compiler support
|
# reworked for better compiler support
|
||||||
|
|
@ -1055,6 +1087,10 @@ if(NOT HAVE_ARMNEON)
|
||||||
set(SDL_DISABLE_NEON 1)
|
set(SDL_DISABLE_NEON 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_ARMSVE2)
|
||||||
|
set(SDL_DISABLE_SVE2 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(SDL_DISABLE_ALLOCA 0)
|
set(SDL_DISABLE_ALLOCA 0)
|
||||||
check_include_file("alloca.h" "HAVE_ALLOCA_H")
|
check_include_file("alloca.h" "HAVE_ALLOCA_H")
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
|
|
|
||||||
|
|
@ -281,6 +281,18 @@ extern SDL_DECLSPEC bool SDLCALL SDL_HasARMSIMD(void);
|
||||||
*/
|
*/
|
||||||
extern SDL_DECLSPEC bool SDLCALL SDL_HasNEON(void);
|
extern SDL_DECLSPEC bool SDLCALL SDL_HasNEON(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine whether the CPU has SVE2 (Scalable Vector Extension 2).
|
||||||
|
*
|
||||||
|
* This is only relevant on ARM64 Linux. On other platforms it always returns
|
||||||
|
* false.
|
||||||
|
*
|
||||||
|
* \returns true if the CPU has SVE2, false otherwise.
|
||||||
|
*
|
||||||
|
* \since This function is available since SDL 3.6.0.
|
||||||
|
*/
|
||||||
|
extern SDL_DECLSPEC bool SDLCALL SDL_HasSVE2(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine whether the CPU has LSX (LOONGARCH SIMD) features.
|
* Determine whether the CPU has LSX (LOONGARCH SIMD) features.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -85,6 +85,16 @@
|
||||||
*/
|
*/
|
||||||
#define SDL_NEON_INTRINSICS 1
|
#define SDL_NEON_INTRINSICS 1
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defined if (and only if) the compiler supports ARM SVE2 intrinsics.
|
||||||
|
*
|
||||||
|
* If this macro is defined, SDL will have already included `<arm_sve.h>`
|
||||||
|
* as appropriate.
|
||||||
|
*
|
||||||
|
* \since This macro is available since SDL 3.6.0.
|
||||||
|
*/
|
||||||
|
#define SDL_SVE2_INTRINSICS 1
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defined if (and only if) the compiler supports PowerPC Altivec intrinsics.
|
* Defined if (and only if) the compiler supports PowerPC Altivec intrinsics.
|
||||||
*
|
*
|
||||||
|
|
@ -237,6 +247,10 @@ _m_prefetch(void *__P)
|
||||||
# define SDL_NEON_INTRINSICS 1
|
# define SDL_NEON_INTRINSICS 1
|
||||||
# include <arm_neon.h>
|
# include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(__ARM_FEATURE_SVE2) && !defined(SDL_DISABLE_SVE2)
|
||||||
|
# define SDL_SVE2_INTRINSICS 1
|
||||||
|
# include <arm_sve.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/* altivec.h redefining bool causes a number of problems, see bugs 3993 and 4392, so you need to explicitly define SDL_ENABLE_ALTIVEC to have it included. */
|
/* altivec.h redefining bool causes a number of problems, see bugs 3993 and 4392, so you need to explicitly define SDL_ENABLE_ALTIVEC to have it included. */
|
||||||
|
|
@ -265,6 +279,20 @@ _m_prefetch(void *__P)
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef SDL_DISABLE_SVE2
|
||||||
|
# if defined(SDL_PLATFORM_WINDOWS)
|
||||||
|
/* Visual Studio doesn't define __ARM_ARCH, but _M_ARM (if set, always 7), and _M_ARM64 (if set, always 1). */
|
||||||
|
# if defined (_M_ARM64) && 0 /* Please only remove this 0 when MSVC releasing support for SVE2 officially. */
|
||||||
|
# define SDL_SVE2_INTRINSICS 1
|
||||||
|
# include <arm_sve.h>
|
||||||
|
# define __ARM_FEATURE_SVE2 1 /* Set __ARM_FEATURE_SVE2 so that it can be used elsewhere, at compile time */
|
||||||
|
# define __ARM_ARCH 8
|
||||||
|
# endif
|
||||||
|
# elif !defined(SDL_PLATFORM_MACOS) /* Apple has no AArch64 device supporting SVE2 */
|
||||||
|
# define SDL_SVE2_INTRINSICS 1
|
||||||
|
# include <arm_sve.h>
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
#endif /* compiler version */
|
#endif /* compiler version */
|
||||||
|
|
||||||
#ifdef SDL_WIKI_DOCUMENTATION_SECTION
|
#ifdef SDL_WIKI_DOCUMENTATION_SECTION
|
||||||
|
|
|
||||||
|
|
@ -625,6 +625,7 @@ typedef unsigned int uintptr_t;
|
||||||
#cmakedefine SDL_DISABLE_LSX 1
|
#cmakedefine SDL_DISABLE_LSX 1
|
||||||
#cmakedefine SDL_DISABLE_LASX 1
|
#cmakedefine SDL_DISABLE_LASX 1
|
||||||
#cmakedefine SDL_DISABLE_NEON 1
|
#cmakedefine SDL_DISABLE_NEON 1
|
||||||
|
#cmakedefine SDL_DISABLE_SVE2 1
|
||||||
|
|
||||||
#ifdef SDL_PLATFORM_PRIVATE
|
#ifdef SDL_PLATFORM_PRIVATE
|
||||||
#include "SDL_end_config_private.h"
|
#include "SDL_end_config_private.h"
|
||||||
|
|
|
||||||
|
|
@ -226,4 +226,7 @@
|
||||||
/* Enable tray subsystem */
|
/* Enable tray subsystem */
|
||||||
#define SDL_TRAY_DUMMY 1
|
#define SDL_TRAY_DUMMY 1
|
||||||
|
|
||||||
|
/* Disable ARM SVE2 intrinsics until we confirm they're available on all Apple mobile and TV hardware */
|
||||||
|
#define SDL_DISABLE_SVE2 1
|
||||||
|
|
||||||
#endif /* SDL_build_config_ios_h_ */
|
#endif /* SDL_build_config_ios_h_ */
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,7 @@
|
||||||
#define CPU_HAS_ARM_SIMD (1 << 11)
|
#define CPU_HAS_ARM_SIMD (1 << 11)
|
||||||
#define CPU_HAS_LSX (1 << 12)
|
#define CPU_HAS_LSX (1 << 12)
|
||||||
#define CPU_HAS_LASX (1 << 13)
|
#define CPU_HAS_LASX (1 << 13)
|
||||||
|
#define CPU_HAS_SVE2 (1 << 14)
|
||||||
|
|
||||||
#define CPU_CFG2 0x2
|
#define CPU_CFG2 0x2
|
||||||
#define CPU_CFG2_LSX (1 << 6)
|
#define CPU_CFG2_LSX (1 << 6)
|
||||||
|
|
@ -514,6 +515,27 @@ static int CPU_haveNEON(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef AT_HWCAP2
|
||||||
|
#define AT_HWCAP2 26
|
||||||
|
#endif
|
||||||
|
#ifndef HWCAP_SVE
|
||||||
|
#define HWCAP_SVE (1 << 22)
|
||||||
|
#endif
|
||||||
|
#ifndef HWCAP2_SVE2
|
||||||
|
#define HWCAP2_SVE2 (1 << 1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int CPU_haveSVE2(void)
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__) && \
|
||||||
|
((defined(SDL_PLATFORM_LINUX) && defined(HAVE_GETAUXVAL)) || defined(SDL_PLATFORM_ANDROID))
|
||||||
|
return ((getauxval(AT_HWCAP2) & HWCAP2_SVE2) == HWCAP2_SVE2)
|
||||||
|
&& ((getauxval(AT_HWCAP) & HWCAP_SVE) == HWCAP_SVE);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static int CPU_readCPUCFG(void)
|
static int CPU_readCPUCFG(void)
|
||||||
{
|
{
|
||||||
uint32_t cfg2 = 0;
|
uint32_t cfg2 = 0;
|
||||||
|
|
@ -960,6 +982,8 @@ static Uint32 SDLCALL SDL_CPUFeatureMaskFromHint(void)
|
||||||
spot_mask = CPU_HAS_LSX;
|
spot_mask = CPU_HAS_LSX;
|
||||||
} else if (ref_string_equals("lasx", spot, end)) {
|
} else if (ref_string_equals("lasx", spot, end)) {
|
||||||
spot_mask = CPU_HAS_LASX;
|
spot_mask = CPU_HAS_LASX;
|
||||||
|
} else if (ref_string_equals("sve2", spot, end)) {
|
||||||
|
spot_mask = CPU_HAS_SVE2;
|
||||||
} else {
|
} else {
|
||||||
// Ignore unknown/incorrect cpu feature(s)
|
// Ignore unknown/incorrect cpu feature(s)
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -1036,6 +1060,10 @@ static Uint32 SDL_GetCPUFeatures(void)
|
||||||
SDL_CPUFeatures |= CPU_HAS_LASX;
|
SDL_CPUFeatures |= CPU_HAS_LASX;
|
||||||
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
|
||||||
}
|
}
|
||||||
|
if (CPU_haveSVE2()) {
|
||||||
|
SDL_CPUFeatures |= CPU_HAS_SVE2;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
|
}
|
||||||
SDL_CPUFeatures &= SDL_CPUFeatureMaskFromHint();
|
SDL_CPUFeatures &= SDL_CPUFeatureMaskFromHint();
|
||||||
}
|
}
|
||||||
return SDL_CPUFeatures;
|
return SDL_CPUFeatures;
|
||||||
|
|
@ -1117,6 +1145,11 @@ bool SDL_HasLASX(void)
|
||||||
return CPU_FEATURE_AVAILABLE(CPU_HAS_LASX);
|
return CPU_FEATURE_AVAILABLE(CPU_HAS_LASX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SDL_HasSVE2(void)
|
||||||
|
{
|
||||||
|
return CPU_FEATURE_AVAILABLE(CPU_HAS_SVE2);
|
||||||
|
}
|
||||||
|
|
||||||
static int SDL_SystemRAM = 0;
|
static int SDL_SystemRAM = 0;
|
||||||
|
|
||||||
int SDL_GetSystemRAM(void)
|
int SDL_GetSystemRAM(void)
|
||||||
|
|
|
||||||
|
|
@ -1287,3 +1287,4 @@ _SDL_GDKResumeRenderer
|
||||||
_SDL_IsPhone
|
_SDL_IsPhone
|
||||||
_SDL_LoadJPG_IO
|
_SDL_LoadJPG_IO
|
||||||
_SDL_LoadJPG
|
_SDL_LoadJPG
|
||||||
|
_SDL_HasSVE2
|
||||||
|
|
|
||||||
|
|
@ -1288,6 +1288,7 @@ SDL3_0.0.0 {
|
||||||
SDL_IsPhone;
|
SDL_IsPhone;
|
||||||
SDL_LoadJPG_IO;
|
SDL_LoadJPG_IO;
|
||||||
SDL_LoadJPG;
|
SDL_LoadJPG;
|
||||||
|
SDL_HasSVE2;
|
||||||
# extra symbols go here (don't modify this line)
|
# extra symbols go here (don't modify this line)
|
||||||
local: *;
|
local: *;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1314,3 +1314,4 @@
|
||||||
#define SDL_IsPhone SDL_IsPhone_REAL
|
#define SDL_IsPhone SDL_IsPhone_REAL
|
||||||
#define SDL_LoadJPG_IO SDL_LoadJPG_IO_REAL
|
#define SDL_LoadJPG_IO SDL_LoadJPG_IO_REAL
|
||||||
#define SDL_LoadJPG SDL_LoadJPG_REAL
|
#define SDL_LoadJPG SDL_LoadJPG_REAL
|
||||||
|
#define SDL_HasSVE2 SDL_HasSVE2_REAL
|
||||||
|
|
|
||||||
|
|
@ -1322,3 +1322,4 @@ SDL_DYNAPI_PROC(void,SDL_GDKResumeRenderer,(SDL_Renderer *a),(a),)
|
||||||
SDL_DYNAPI_PROC(bool,SDL_IsPhone,(void),(),return)
|
SDL_DYNAPI_PROC(bool,SDL_IsPhone,(void),(),return)
|
||||||
SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG_IO,(SDL_IOStream *a,bool b),(a,b),return)
|
SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG_IO,(SDL_IOStream *a,bool b),(a,b),return)
|
||||||
SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG,(const char *a),(a),return)
|
SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG,(const char *a),(a),return)
|
||||||
|
SDL_DYNAPI_PROC(bool,SDL_HasSVE2,(void),(),return)
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,10 @@
|
||||||
#include "SDL_pixels_c.h"
|
#include "SDL_pixels_c.h"
|
||||||
#include "SDL_surface_c.h"
|
#include "SDL_surface_c.h"
|
||||||
|
|
||||||
|
#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
#include "./arm/SDL_sve2_blit_A.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
// Functions to perform alpha blended blitting
|
// Functions to perform alpha blended blitting
|
||||||
|
|
||||||
// N->1 blending with per-surface alpha
|
// N->1 blending with per-surface alpha
|
||||||
|
|
@ -1477,6 +1481,17 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
}
|
}
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
|
#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
if (SDL_HasSVE2()) {
|
||||||
|
if (sf->bytes_per_pixel == 4 &&
|
||||||
|
df->bytes_per_pixel == 2 &&
|
||||||
|
df->Rmask == 0x0000F800 &&
|
||||||
|
df->Gmask == 0x000007E0 &&
|
||||||
|
df->Bmask == 0x0000001F) {
|
||||||
|
return Blit8888to565PixelAlphaSwizzleSVE2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
||||||
if (df->Gmask == 0x7e0) {
|
if (df->Gmask == 0x7e0) {
|
||||||
return BlitARGBto565PixelAlpha;
|
return BlitARGBto565PixelAlpha;
|
||||||
|
|
@ -1504,6 +1519,19 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
return Blit8888to8888PixelAlphaSwizzleLSX;
|
return Blit8888to8888PixelAlphaSwizzleLSX;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
if (SDL_HasSVE2()
|
||||||
|
/* NEON is faster than SVE2 when vector size is 128bit */
|
||||||
|
#if defined(SDL_NEON_INTRINSICS)
|
||||||
|
&& SDL_GetSVEVectorSize() > 128
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
|
// To prevent "unused function" compiler warnings/errors
|
||||||
|
(void)Blit8888to8888PixelAlpha;
|
||||||
|
(void)Blit8888to8888PixelAlphaSwizzle;
|
||||||
|
return Blit8888to8888PixelAlphaSwizzleSVE2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
// To prevent "unused function" compiler warnings/errors
|
// To prevent "unused function" compiler warnings/errors
|
||||||
(void)Blit8888to8888PixelAlpha;
|
(void)Blit8888to8888PixelAlpha;
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,10 @@
|
||||||
#include "SDL_surface_c.h"
|
#include "SDL_surface_c.h"
|
||||||
#include "SDL_blit_copy.h"
|
#include "SDL_blit_copy.h"
|
||||||
|
|
||||||
|
#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
#include "./arm/SDL_sve2_blit_N.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
// General optimized routines that write char by char
|
// General optimized routines that write char by char
|
||||||
#define HAVE_FAST_WRITE_INT8 1
|
#define HAVE_FAST_WRITE_INT8 1
|
||||||
|
|
||||||
|
|
@ -3117,10 +3121,27 @@ SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface)
|
||||||
return Blit8888to8888PixelSwizzleSSE41;
|
return Blit8888to8888PixelSwizzleSSE41;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
if (SDL_HasSVE2()) {
|
||||||
|
return Blit8888to8888PixelSwizzleSVE2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
return Blit8888to8888PixelSwizzleNEON;
|
return Blit8888to8888PixelSwizzleNEON;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||||
|
if (SDL_HasSVE2()) {
|
||||||
|
/* RGBA8888/ARGB8888/XRGB8888 -> RGB565 */
|
||||||
|
if (srcfmt->bytes_per_pixel == 4 &&
|
||||||
|
dstfmt->bytes_per_pixel == 2 &&
|
||||||
|
dstfmt->Rmask == 0x0000F800 &&
|
||||||
|
dstfmt->Gmask == 0x000007E0 &&
|
||||||
|
dstfmt->Bmask == 0x0000001F) {
|
||||||
|
return Blit8888to565PixelSwizzleSVE2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
blitfun = NULL;
|
blitfun = NULL;
|
||||||
if (dstfmt->bits_per_pixel > 8) {
|
if (dstfmt->bits_per_pixel > 8) {
|
||||||
|
|
|
||||||
89
src/video/arm/SDL_sve2_blit_A.c
Normal file
89
src/video/arm/SDL_sve2_blit_A.c
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
Simple DirectMedia Layer
|
||||||
|
Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "SDL_sve2_blit_A.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#ifdef SDL_SVE2_INTRINSICS
|
||||||
|
|
||||||
|
#undef sdl_sve_rgb32_blend_op_fill_alpha
|
||||||
|
#define sdl_sve_rgb32_blend_op_fill_alpha(ma_alpha_chn_idx) \
|
||||||
|
if (sve_src_chn_idx == (ma_alpha_chn_idx)) { \
|
||||||
|
/* fill alpha */ \
|
||||||
|
sve_target_u16 = svdup_u16(0xFF); \
|
||||||
|
} else { \
|
||||||
|
svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
|
||||||
|
sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16, \
|
||||||
|
sve_target_u16, \
|
||||||
|
vMask); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef sdl_sve_rgb32_blend_op_copy_alpha
|
||||||
|
#define sdl_sve_rgb32_blend_op_copy_alpha(ma_alpha_chn_idx) \
|
||||||
|
if (sve_src_chn_idx == (ma_alpha_chn_idx)) { \
|
||||||
|
svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
|
||||||
|
sve_target_u16 = sdl_sve_chn_blend_with_mask(svdup_u16(0xFF), \
|
||||||
|
sve_target_u16, \
|
||||||
|
vMask); \
|
||||||
|
} else { \
|
||||||
|
svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
|
||||||
|
sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16, \
|
||||||
|
sve_target_u16, \
|
||||||
|
vMask); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef sdl_sve_rgb32_blend_to_rgb565_op
|
||||||
|
#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
|
||||||
|
do { \
|
||||||
|
svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
|
||||||
|
sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16, \
|
||||||
|
sve_target_u16, \
|
||||||
|
vMask); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#include "SDL_sve2_swizzle.h"
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------------------*
|
||||||
|
* Swizzle Blend with Alpha *
|
||||||
|
*-----------------------------------------------------------------------------*/
|
||||||
|
SDL_TARGETING("arch=armv8-a+sve2")
|
||||||
|
void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info)
|
||||||
|
{
|
||||||
|
const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
|
||||||
|
assert(0 != srcfmt->Amask);
|
||||||
|
(void)srcfmt;
|
||||||
|
|
||||||
|
sdl_sve_8888_to_8888_swizzle_dispatcher(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_TARGETING("arch=armv8-a+sve2")
|
||||||
|
void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info)
|
||||||
|
{
|
||||||
|
sdl_sve_rgb32_to_rgb565_swizzle_dispatcher(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_TARGETING("arch=armv8-a+sve2")
|
||||||
|
size_t SDL_GetSVEVectorSize(void)
|
||||||
|
{
|
||||||
|
return svlen(svundef_u8()) * 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SDL_SVE2_INTRINSICS */
|
||||||
37
src/video/arm/SDL_sve2_blit_A.h
Normal file
37
src/video/arm/SDL_sve2_blit_A.h
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
/*
|
||||||
|
Simple DirectMedia Layer
|
||||||
|
Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SDL_sve2_blit_A_h_
|
||||||
|
#define SDL_sve2_blit_A_h_
|
||||||
|
|
||||||
|
#include "../../SDL_internal.h"
|
||||||
|
#include "../SDL_blit.h"
|
||||||
|
|
||||||
|
#ifdef SDL_SVE2_INTRINSICS
|
||||||
|
|
||||||
|
void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
|
||||||
|
void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
|
||||||
|
|
||||||
|
size_t SDL_GetSVEVectorSize(void);
|
||||||
|
|
||||||
|
#endif /* SDL_SVE2_INTRINSICS */
|
||||||
|
|
||||||
|
#endif /* SDL_sve2_blitters_h_ */
|
||||||
64
src/video/arm/SDL_sve2_blit_N.c
Normal file
64
src/video/arm/SDL_sve2_blit_N.c
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
Simple DirectMedia Layer
|
||||||
|
Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "SDL_sve2_blit_N.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#ifdef SDL_SVE2_INTRINSICS
|
||||||
|
|
||||||
|
#undef sdl_sve_rgb32_blend_op_fill_alpha
|
||||||
|
#define sdl_sve_rgb32_blend_op_fill_alpha(ma_alpha_chn_idx) \
|
||||||
|
do { \
|
||||||
|
if (sve_src_chn_idx == (ma_alpha_chn_idx)) { \
|
||||||
|
/* fill alpha */ \
|
||||||
|
sve_target_u16 = svdup_u16(0xFF); \
|
||||||
|
} else { \
|
||||||
|
sve_target_u16 = sve_source_u16; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#undef sdl_sve_rgb32_blend_op_copy_alpha
|
||||||
|
#define sdl_sve_rgb32_blend_op_copy_alpha(ma_alpha_chn_idx) \
|
||||||
|
do { \
|
||||||
|
sve_target_u16 = sve_source_u16; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#undef sdl_sve_rgb32_blend_to_rgb565_op
|
||||||
|
#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
|
||||||
|
do { \
|
||||||
|
sve_target_u16 = sve_source_u16; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#include "SDL_sve2_swizzle.h"
|
||||||
|
|
||||||
|
SDL_TARGETING("arch=armv8-a+sve2")
|
||||||
|
void Blit8888to8888PixelSwizzleSVE2(SDL_BlitInfo *info)
|
||||||
|
{
|
||||||
|
sdl_sve_8888_to_8888_swizzle_dispatcher(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_TARGETING("arch=armv8-a+sve2")
|
||||||
|
void Blit8888to565PixelSwizzleSVE2(SDL_BlitInfo *info)
|
||||||
|
{
|
||||||
|
sdl_sve_rgb32_to_rgb565_swizzle_dispatcher(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SDL_SVE2_INTRINSICS */
|
||||||
35
src/video/arm/SDL_sve2_blit_N.h
Normal file
35
src/video/arm/SDL_sve2_blit_N.h
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
Simple DirectMedia Layer
|
||||||
|
Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SDL_sve2_blit_N_h_
|
||||||
|
#define SDL_sve2_blit_N_h_
|
||||||
|
|
||||||
|
#include "../../SDL_internal.h"
|
||||||
|
#include "../SDL_blit.h"
|
||||||
|
|
||||||
|
#ifdef SDL_SVE2_INTRINSICS
|
||||||
|
|
||||||
|
void Blit8888to8888PixelSwizzleSVE2(SDL_BlitInfo *info);
|
||||||
|
void Blit8888to565PixelSwizzleSVE2(SDL_BlitInfo *info);
|
||||||
|
|
||||||
|
#endif /* SDL_SVE2_INTRINSICS */
|
||||||
|
|
||||||
|
#endif /* SDL_sve2_blitters_h_ */
|
||||||
1142
src/video/arm/SDL_sve2_extension.h
Normal file
1142
src/video/arm/SDL_sve2_extension.h
Normal file
File diff suppressed because it is too large
Load diff
2375
src/video/arm/SDL_sve2_swizzle.h
Normal file
2375
src/video/arm/SDL_sve2_swizzle.h
Normal file
File diff suppressed because it is too large
Load diff
206
src/video/arm/SDL_sve2_util.h
Normal file
206
src/video/arm/SDL_sve2_util.h
Normal file
|
|
@ -0,0 +1,206 @@
|
||||||
|
/*
|
||||||
|
Simple DirectMedia Layer
|
||||||
|
Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SDL_SVE2_UTIL_H
|
||||||
|
#define SDL_SVE2_UTIL_H
|
||||||
|
|
||||||
|
#undef SVE_0_CONNECT2
|
||||||
|
#undef SVE_0_CONNECT3
|
||||||
|
#undef SVE_0_CONNECT4
|
||||||
|
#undef SVE_0_CONNECT5
|
||||||
|
#undef SVE_0_CONNECT6
|
||||||
|
#undef SVE_0_CONNECT7
|
||||||
|
#undef SVE_0_CONNECT8
|
||||||
|
#undef SVE_0_CONNECT9
|
||||||
|
|
||||||
|
#undef SVE_CONNECT2
|
||||||
|
#undef SVE_CONNECT3
|
||||||
|
#undef SVE_CONNECT4
|
||||||
|
#undef SVE_CONNECT5
|
||||||
|
#undef SVE_CONNECT6
|
||||||
|
#undef SVE_CONNECT7
|
||||||
|
#undef SVE_CONNECT8
|
||||||
|
#undef SVE_CONNECT9
|
||||||
|
#undef ALT_SVE_CONNECT2
|
||||||
|
|
||||||
|
#undef SVE_SAFE_NAME
|
||||||
|
|
||||||
|
#undef SVE_CONNECT
|
||||||
|
|
||||||
|
#define SVE_0_CONNECT2(ma_A, ma_B) ma_A##ma_B
|
||||||
|
#define SVE_0_CONNECT3(ma_A, ma_B, ma_C) ma_A##ma_B##ma_C
|
||||||
|
#define SVE_0_CONNECT4(ma_A, ma_B, ma_C, ma_D) ma_A##ma_B##ma_C##ma_D
|
||||||
|
#define SVE_0_CONNECT5(ma_A, ma_B, ma_C, ma_D, ma_E) \
|
||||||
|
ma_A##ma_B##ma_C##ma_D##ma_E
|
||||||
|
#define SVE_0_CONNECT6(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F) \
|
||||||
|
ma_A##ma_B##ma_C##ma_D##ma_E##ma_F
|
||||||
|
#define SVE_0_CONNECT7(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G) \
|
||||||
|
ma_A##ma_B##ma_C##ma_D##ma_E##ma_F##ma_G
|
||||||
|
#define SVE_0_CONNECT8(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H) \
|
||||||
|
ma_A##ma_B##ma_C##ma_D##ma_E##ma_F##ma_G##ma_H
|
||||||
|
#define SVE_0_CONNECT9(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H, ma_I) \
|
||||||
|
ma_A##ma_B##ma_C##ma_D##ma_E##ma_F##ma_G##ma_H##ma_I
|
||||||
|
|
||||||
|
#define ALT_SVE_CONNECT2(ma_A, ma_B) SVE_0_CONNECT2(ma_A, ma_B)
|
||||||
|
#define SVE_CONNECT2(ma_A, ma_B) SVE_0_CONNECT2(ma_A, ma_B)
|
||||||
|
#define SVE_CONNECT3(ma_A, ma_B, ma_C) SVE_0_CONNECT3(ma_A, ma_B, ma_C)
|
||||||
|
#define SVE_CONNECT4(ma_A, ma_B, ma_C, ma_D) \
|
||||||
|
SVE_0_CONNECT4(ma_A, ma_B, ma_C, ma_D)
|
||||||
|
#define SVE_CONNECT5(ma_A, ma_B, ma_C, ma_D, ma_E) \
|
||||||
|
SVE_0_CONNECT5(ma_A, ma_B, ma_C, ma_D, ma_E)
|
||||||
|
#define SVE_CONNECT6(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F) \
|
||||||
|
SVE_0_CONNECT6(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F)
|
||||||
|
#define SVE_CONNECT7(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G) \
|
||||||
|
SVE_0_CONNECT7(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G)
|
||||||
|
#define SVE_CONNECT8(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H) \
|
||||||
|
SVE_0_CONNECT8(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H)
|
||||||
|
#define SVE_CONNECT9(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H, ma_I) \
|
||||||
|
SVE_0_CONNECT9(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H, ma_I)
|
||||||
|
|
||||||
|
#define SVE_CONNECT(...) \
|
||||||
|
ALT_SVE_CONNECT2(SVE_CONNECT, \
|
||||||
|
SVE_VA_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
|
||||||
|
|
||||||
|
#ifndef SVE_VA_NUM_ARGS_IMPL
|
||||||
|
#define SVE_VA_NUM_ARGS_IMPL(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \
|
||||||
|
_12, _13, _14, _15, _16, ma_N, ...) ma_N
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SVE_VA_NUM_ARGS
|
||||||
|
#define SVE_VA_NUM_ARGS(...) \
|
||||||
|
SVE_VA_NUM_ARGS_IMPL(0, ##__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, \
|
||||||
|
8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SVE_SAFE_NAME(ma_NAME) SVE_CONNECT3(ma_, ma_NAME, ma_LINEma_)
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------------*
|
||||||
|
* SVE Test Helper *
|
||||||
|
* ---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#define SVT_PRINT_VECTOR(ma_VECOTOR, ma_ELEMENT_T, ma_FORMAT_STRING) \
|
||||||
|
do { \
|
||||||
|
int_fast8_t nElementCount = svcntb_pat(SV_ALL) / sizeof(ma_ELEMENT_T); \
|
||||||
|
uint8_t SVE_SAFE_NAME(chVectorBuffer) \
|
||||||
|
[nElementCount * sizeof(ma_ELEMENT_T)]; \
|
||||||
|
\
|
||||||
|
svst1_u8(svptrue_b8(), \
|
||||||
|
SVE_SAFE_NAME(chVectorBuffer), \
|
||||||
|
svreinterpret_u8(ma_VECOTOR)); \
|
||||||
|
\
|
||||||
|
ma_ELEMENT_T *pElement = (ma_ELEMENT_T *)SVE_SAFE_NAME(chVectorBuffer); \
|
||||||
|
printf("%s\t[", #ma_VECOTOR); \
|
||||||
|
do { \
|
||||||
|
printf(ma_FORMAT_STRING "\t", (int)*pElement++); \
|
||||||
|
} while (--nElementCount); \
|
||||||
|
printf("]\r\n"); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SVT_INIT_VECOTR(ma_VECTOR, ma_ELEMENT_T, ...) \
|
||||||
|
do { \
|
||||||
|
uint8_t SVE_SAFE_NAME(chVectorBuffer)[svcntb_pat(SV_ALL)]; \
|
||||||
|
\
|
||||||
|
memset(SVE_SAFE_NAME(chVectorBuffer), /* This should NOT be SDL_memset() */ \
|
||||||
|
0, \
|
||||||
|
sizeof(SVE_SAFE_NAME(chVectorBuffer))); \
|
||||||
|
memcpy(SVE_SAFE_NAME(chVectorBuffer), /* This should NOT be SDL_memcpy() */ \
|
||||||
|
(ma_ELEMENT_T[]){ __VA_ARGS__ }, \
|
||||||
|
MIN(sizeof(SVE_SAFE_NAME(chVectorBuffer)), \
|
||||||
|
sizeof((ma_ELEMENT_T[]){ __VA_ARGS__ }))); \
|
||||||
|
\
|
||||||
|
ma_VECTOR = svld1(svptrue_b8(), \
|
||||||
|
(ma_ELEMENT_T *)SVE_SAFE_NAME(chVectorBuffer)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SVT_INIT_PRED(ma_PREDICT, ...) \
|
||||||
|
do { \
|
||||||
|
uint8_t SVE_SAFE_NAME(chBuffer)[svlen(svundef_u64())]; \
|
||||||
|
memset(SVE_SAFE_NAME(chBuffer), /* This should NOT be SDL_memset() */ \
|
||||||
|
0, \
|
||||||
|
sizeof(SVE_SAFE_NAME(chBuffer))); \
|
||||||
|
\
|
||||||
|
memcpy(SVE_SAFE_NAME(chBuffer), /* This should NOT be SDL_memcpy() */ \
|
||||||
|
(uint8_t[]){ __VA_ARGS__ }, \
|
||||||
|
MIN(sizeof(SVE_SAFE_NAME(chBuffer)), \
|
||||||
|
sizeof((uint8_t[]){ __VA_ARGS__ }))); \
|
||||||
|
\
|
||||||
|
ma_PREDICT = (*(svbool_t *)SVE_SAFE_NAME(chBuffer)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SVT_PRINT_PRED(ma_PREDICT, ma_TYPE_T) \
|
||||||
|
do { \
|
||||||
|
printf("%8s\t[", #ma_PREDICT); \
|
||||||
|
uint16_t SVE_SAFE_NAME(hwBuffer)[svlen(svundef_u64()) / 2]; \
|
||||||
|
memset(SVE_SAFE_NAME(hwBuffer), /* This should NOT be SDL_memset() */ \
|
||||||
|
0, \
|
||||||
|
sizeof(SVE_SAFE_NAME(hwBuffer))); \
|
||||||
|
*(volatile svbool_t *)SVE_SAFE_NAME(hwBuffer) = (ma_PREDICT); \
|
||||||
|
\
|
||||||
|
uint_fast16_t SVE_SAFE_NAME(nTotalBits) = svlen(svundef_u8()); \
|
||||||
|
uint_fast8_t SVE_SAFE_NAME(nElementBits) = sizeof(ma_TYPE_T); \
|
||||||
|
\
|
||||||
|
uint16_t *phwPred = SVE_SAFE_NAME(hwBuffer); \
|
||||||
|
do { \
|
||||||
|
uint16_t hwPred = *phwPred++; \
|
||||||
|
\
|
||||||
|
for (uint_fast8_t n = 0; \
|
||||||
|
n < 16; \
|
||||||
|
n += SVE_SAFE_NAME(nElementBits)) { \
|
||||||
|
\
|
||||||
|
if (hwPred & 0x01) { \
|
||||||
|
printf("True "); \
|
||||||
|
} else { \
|
||||||
|
printf("False"); \
|
||||||
|
} \
|
||||||
|
printf("%*s\t", (int)sizeof(ma_TYPE_T) - 1, ""); \
|
||||||
|
hwPred >>= SVE_SAFE_NAME(nElementBits); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
SVE_SAFE_NAME(nTotalBits) -= 16; \
|
||||||
|
} while (SVE_SAFE_NAME(nTotalBits)); \
|
||||||
|
\
|
||||||
|
printf("]\r\n"); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SVT_PRINT_BUFFER(ma_BUFF_PTR, ma_SIZE, ma_TYPE_T, ma_FMT_STR, ma_STRIDE) \
|
||||||
|
do { \
|
||||||
|
ma_TYPE_T *pBuffer = (ma_TYPE_T *)ma_BUFF_PTR; \
|
||||||
|
size_t nElementCount = (ma_SIZE) / sizeof(ma_TYPE_T); \
|
||||||
|
\
|
||||||
|
size_t nStrideSize = (ma_STRIDE); \
|
||||||
|
size_t nLineCount = 0; \
|
||||||
|
\
|
||||||
|
printf("%s\n\t", #ma_BUFF_PTR); \
|
||||||
|
do { \
|
||||||
|
\
|
||||||
|
printf(ma_FMT_STR " ", *pBuffer++); \
|
||||||
|
nLineCount++; \
|
||||||
|
if (nLineCount >= nStrideSize) { \
|
||||||
|
nLineCount = 0; \
|
||||||
|
printf("\n\t"); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
} while (--nElementCount); \
|
||||||
|
printf("\n"); \
|
||||||
|
\
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#endif /* SDL_SVE2_UTIL_H */
|
||||||
|
|
@ -414,6 +414,7 @@ static int TestCPUInfo(bool verbose)
|
||||||
SDL_Log("NEON %s", SDL_HasNEON() ? "detected" : "not detected");
|
SDL_Log("NEON %s", SDL_HasNEON() ? "detected" : "not detected");
|
||||||
SDL_Log("LSX %s", SDL_HasLSX() ? "detected" : "not detected");
|
SDL_Log("LSX %s", SDL_HasLSX() ? "detected" : "not detected");
|
||||||
SDL_Log("LASX %s", SDL_HasLASX() ? "detected" : "not detected");
|
SDL_Log("LASX %s", SDL_HasLASX() ? "detected" : "not detected");
|
||||||
|
SDL_Log("SVE2 %s", SDL_HasSVE2() ? "detected" : "not detected");
|
||||||
SDL_Log("System RAM %d MB", SDL_GetSystemRAM());
|
SDL_Log("System RAM %d MB", SDL_GetSystemRAM());
|
||||||
SDL_Log("System memory page size %d bytes", SDL_GetSystemPageSize());
|
SDL_Log("System memory page size %d bytes", SDL_GetSystemPageSize());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue