diff --git a/Android.mk b/Android.mk
index 2e3b11483c..d53bf403b1 100644
--- a/Android.mk
+++ b/Android.mk
@@ -84,6 +84,7 @@ LOCAL_SRC_FILES := \
 	$(wildcard $(LOCAL_PATH)/src/tray/*.c) \
 	$(wildcard $(LOCAL_PATH)/src/video/*.c) \
 	$(wildcard $(LOCAL_PATH)/src/video/android/*.c) \
+	$(wildcard $(LOCAL_PATH)/src/video/arm/*.c) \
 	$(wildcard $(LOCAL_PATH)/src/video/yuv2rgb/*.c))
 
 LOCAL_CFLAGS += -DGL_GLEXT_PROTOTYPES
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e591a011c1..851e11add9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -314,6 +314,7 @@ dep_option(SDL_SSE4_2              "Use SSE4.2 assembly routines" ON "SDL_ASSEMB
 dep_option(SDL_MMX                 "Use MMX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
 dep_option(SDL_ALTIVEC             "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
 dep_option(SDL_ARMNEON             "Use NEON assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM32 OR SDL_CPU_ARM64" OFF)
+dep_option(SDL_ARMSVE2             "Use SVE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM64" OFF)
 dep_option(SDL_LSX                 "Use LSX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
 dep_option(SDL_LASX                "Use LASX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
 
@@ -939,6 +940,37 @@ if(SDL_ASSEMBLY)
     endif()
   endif()
 
+  if(SDL_ARMSVE2)
+    cmake_push_check_state()
+      string(APPEND CMAKE_REQUIRED_FLAGS " -march=armv8-a+sve2")
+      check_arm_source_compiles([==[
+        #include <arm_sve.h>
+        svuint32_t sve2_test(svuint32_t a, svuint32_t b) {
+          return svadd_u32_x(svptrue_b32(), a, b);
+        }
+        int main(int argc, char *argv[]) {
+          sve2_test(svdup_u32(0), svdup_u32(0));
+          return 0;
+        }]==] COMPILER_SUPPORTS_ARMSVE2)
+      if(COMPILER_SUPPORTS_ARMSVE2)
+        set(HAVE_ARMSVE2 TRUE)
+      endif()
+    cmake_pop_check_state()
+
+    if(HAVE_ARMSVE2)
+      sdl_sources(
+        "${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_A.c"
+        "${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_N.c"
+      )
+      set_source_files_properties(
+        "${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_A.c"
+        "${SDL3_SOURCE_DIR}/src/video/arm/SDL_sve2_blit_N.c"
+        PROPERTIES
+          SKIP_PRECOMPILE_HEADERS ON
+      )
+    endif()
+  endif()
+
   if(USE_GCC OR USE_CLANG)
     # TODO: Those all seem to be quite GCC specific - needs to be
     # reworked for better compiler support
@@ -1055,6 +1087,10 @@ if(NOT HAVE_ARMNEON)
   set(SDL_DISABLE_NEON 1)
 endif()
 
+if(NOT HAVE_ARMSVE2)
+  set(SDL_DISABLE_SVE2 1)
+endif()
+
 set(SDL_DISABLE_ALLOCA 0)
 check_include_file("alloca.h" "HAVE_ALLOCA_H")
 if(MSVC)
diff --git a/include/SDL3/SDL_cpuinfo.h b/include/SDL3/SDL_cpuinfo.h
index 5669c2373d..765cadf287 100644
--- a/include/SDL3/SDL_cpuinfo.h
+++ b/include/SDL3/SDL_cpuinfo.h
@@ -281,6 +281,18 @@ extern SDL_DECLSPEC bool SDLCALL SDL_HasARMSIMD(void);
  */
 extern SDL_DECLSPEC bool SDLCALL SDL_HasNEON(void);
 
+/**
+ * Determine whether the CPU has SVE2 (Scalable Vector Extension 2).
+ *
+ * This is only relevant on ARM64 Linux. On other platforms it always returns
+ * false.
+ *
+ * \returns true if the CPU has SVE2, false otherwise.
+ *
+ * \since This function is available since SDL 3.6.0.
+ */
+extern SDL_DECLSPEC bool SDLCALL SDL_HasSVE2(void);
+
 /**
  * Determine whether the CPU has LSX (LOONGARCH SIMD) features.
  *
diff --git a/include/SDL3/SDL_intrin.h b/include/SDL3/SDL_intrin.h
index a2e968080c..ecd8192941 100644
--- a/include/SDL3/SDL_intrin.h
+++ b/include/SDL3/SDL_intrin.h
@@ -85,6 +85,16 @@
  */
 #define SDL_NEON_INTRINSICS 1
 
+/**
+ * Defined if (and only if) the compiler supports ARM SVE2 intrinsics.
+ *
+ * If this macro is defined, SDL will have already included `<arm_sve.h>`
+ * as appropriate.
+ *
+ * \since This macro is available since SDL 3.6.0.
+ */
+#define SDL_SVE2_INTRINSICS 1
+
 /**
  * Defined if (and only if) the compiler supports PowerPC Altivec intrinsics.
  *
@@ -237,6 +247,10 @@ _m_prefetch(void *__P)
 #  define SDL_NEON_INTRINSICS 1
 #  include <arm_neon.h>
 #endif
+#if defined(__ARM_FEATURE_SVE2) && !defined(SDL_DISABLE_SVE2)
+#  define SDL_SVE2_INTRINSICS 1
+#  include <arm_sve.h>
+#endif
 
 #else
 /* altivec.h redefining bool causes a number of problems, see bugs 3993 and 4392, so you need to explicitly define SDL_ENABLE_ALTIVEC to have it included. */
@@ -265,6 +279,20 @@ _m_prefetch(void *__P)
 #    endif
 #  endif
 #endif
+#ifndef SDL_DISABLE_SVE2
+#  if defined(SDL_PLATFORM_WINDOWS)
+/* Visual Studio doesn't define __ARM_ARCH, but _M_ARM (if set, always 7), and _M_ARM64 (if set, always 1). */
+#    if defined (_M_ARM64) && 0 /* Please only remove this 0 when MSVC releasing support for SVE2 officially. */
+#      define SDL_SVE2_INTRINSICS 1
+#      include <arm_sve.h>
+#      define __ARM_FEATURE_SVE2 1 /* Set __ARM_FEATURE_SVE2 so that it can be used elsewhere, at compile time */
+#      define __ARM_ARCH 8
+#    endif
+#  elif !defined(SDL_PLATFORM_MACOS)  /* Apple has no AArch64 device supporting SVE2 */
+#    define SDL_SVE2_INTRINSICS 1
+#    include <arm_sve.h>
+#  endif
+#endif
 #endif /* compiler version */
 
 #ifdef SDL_WIKI_DOCUMENTATION_SECTION
diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake
index e7d0b34f42..2e0cdc21b4 100644
--- a/include/build_config/SDL_build_config.h.cmake
+++ b/include/build_config/SDL_build_config.h.cmake
@@ -625,6 +625,7 @@ typedef unsigned int uintptr_t;
 #cmakedefine SDL_DISABLE_LSX 1
 #cmakedefine SDL_DISABLE_LASX 1
 #cmakedefine SDL_DISABLE_NEON 1
+#cmakedefine SDL_DISABLE_SVE2 1
 
 #ifdef SDL_PLATFORM_PRIVATE
 #include "SDL_end_config_private.h"
diff --git a/include/build_config/SDL_build_config_ios.h b/include/build_config/SDL_build_config_ios.h
index 308270b5a0..56f17f8b8f 100644
--- a/include/build_config/SDL_build_config_ios.h
+++ b/include/build_config/SDL_build_config_ios.h
@@ -226,4 +226,7 @@
 /* Enable tray subsystem */
 #define SDL_TRAY_DUMMY 1
 
+/* Disable ARM SVE2 intrinsics until we confirm they're available on all Apple mobile and TV hardware */
+#define SDL_DISABLE_SVE2 1
+
 #endif /* SDL_build_config_ios_h_ */
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index 966a5ae79a..19daae4421 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -109,6 +109,7 @@
 #define CPU_HAS_ARM_SIMD (1 << 11)
 #define CPU_HAS_LSX      (1 << 12)
 #define CPU_HAS_LASX     (1 << 13)
+#define CPU_HAS_SVE2     (1 << 14)
 
 #define CPU_CFG2      0x2
 #define CPU_CFG2_LSX  (1 << 6)
@@ -514,6 +515,27 @@ static int CPU_haveNEON(void)
 #endif
 }
 
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+#ifndef HWCAP2_SVE2
+#define HWCAP2_SVE2 (1 << 1)
+#endif
+
+static int CPU_haveSVE2(void)
+{
+#if defined(__aarch64__) && \
+    ((defined(SDL_PLATFORM_LINUX) && defined(HAVE_GETAUXVAL)) || defined(SDL_PLATFORM_ANDROID))
+    return ((getauxval(AT_HWCAP2) & HWCAP2_SVE2) == HWCAP2_SVE2)
+        && ((getauxval(AT_HWCAP) & HWCAP_SVE) == HWCAP_SVE);
+#else
+    return 0;
+#endif
+}
+
 static int CPU_readCPUCFG(void)
 {
     uint32_t cfg2 = 0;
@@ -960,6 +982,8 @@ static Uint32 SDLCALL SDL_CPUFeatureMaskFromHint(void)
                 spot_mask = CPU_HAS_LSX;
             } else if (ref_string_equals("lasx", spot, end)) {
                 spot_mask = CPU_HAS_LASX;
+            } else if (ref_string_equals("sve2", spot, end)) {
+                spot_mask = CPU_HAS_SVE2;
             } else {
                 // Ignore unknown/incorrect cpu feature(s)
                 continue;
@@ -1036,6 +1060,10 @@ static Uint32 SDL_GetCPUFeatures(void)
             SDL_CPUFeatures |= CPU_HAS_LASX;
             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
         }
+        if (CPU_haveSVE2()) {
+            SDL_CPUFeatures |= CPU_HAS_SVE2;
+            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
+        }
         SDL_CPUFeatures &= SDL_CPUFeatureMaskFromHint();
     }
     return SDL_CPUFeatures;
@@ -1117,6 +1145,11 @@ bool SDL_HasLASX(void)
     return CPU_FEATURE_AVAILABLE(CPU_HAS_LASX);
 }
 
+bool SDL_HasSVE2(void)
+{
+    return CPU_FEATURE_AVAILABLE(CPU_HAS_SVE2);
+}
+
 static int SDL_SystemRAM = 0;
 
 int SDL_GetSystemRAM(void)
diff --git a/src/dynapi/SDL_dynapi.exports b/src/dynapi/SDL_dynapi.exports
index 67600f2b7b..32e9fbff86 100644
--- a/src/dynapi/SDL_dynapi.exports
+++ b/src/dynapi/SDL_dynapi.exports
@@ -1287,3 +1287,4 @@ _SDL_GDKResumeRenderer
 _SDL_IsPhone
 _SDL_LoadJPG_IO
 _SDL_LoadJPG
+_SDL_HasSVE2
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index 3fdc470a33..ca1a1c97d9 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -1288,6 +1288,7 @@ SDL3_0.0.0 {
     SDL_IsPhone;
     SDL_LoadJPG_IO;
     SDL_LoadJPG;
+    SDL_HasSVE2;
     # extra symbols go here (don't modify this line)
   local: *;
 };
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 7b88affdc6..677768ff2f 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -1314,3 +1314,4 @@
 #define SDL_IsPhone SDL_IsPhone_REAL
 #define SDL_LoadJPG_IO SDL_LoadJPG_IO_REAL
 #define SDL_LoadJPG SDL_LoadJPG_REAL
+#define SDL_HasSVE2 SDL_HasSVE2_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 24a5afad98..99899b346e 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -1322,3 +1322,4 @@ SDL_DYNAPI_PROC(void,SDL_GDKResumeRenderer,(SDL_Renderer *a),(a),)
 SDL_DYNAPI_PROC(bool,SDL_IsPhone,(void),(),return)
 SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG_IO,(SDL_IOStream *a,bool b),(a,b),return)
 SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG,(const char *a),(a),return)
+SDL_DYNAPI_PROC(bool,SDL_HasSVE2,(void),(),return)
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index f7a997f3b0..0dcd25d885 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -25,6 +25,10 @@
 #include "SDL_pixels_c.h"
 #include "SDL_surface_c.h"
 
+#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
+#include "./arm/SDL_sve2_blit_A.h"
+#endif
+
 // Functions to perform alpha blended blitting
 
 // N->1 blending with per-surface alpha
@@ -1477,6 +1481,17 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
             }
 
         case 2:
+#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
+            if (SDL_HasSVE2()) {
+                if (sf->bytes_per_pixel == 4 &&
+                    df->bytes_per_pixel == 2 &&
+                    df->Rmask == 0x0000F800 &&
+                    df->Gmask == 0x000007E0 &&
+                    df->Bmask == 0x0000001F) {
+                    return Blit8888to565PixelAlphaSwizzleSVE2;
+                }
+            }
+#endif
             if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
                 if (df->Gmask == 0x7e0) {
                     return BlitARGBto565PixelAlpha;
@@ -1504,6 +1519,19 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
                     return Blit8888to8888PixelAlphaSwizzleLSX;
                 }
 #endif
+#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
+                if (SDL_HasSVE2() 
+            /* NEON is faster than SVE2 when vector size is 128bit */
+            #if defined(SDL_NEON_INTRINSICS)
+                && SDL_GetSVEVectorSize() > 128
+            #endif
+                ) {
+                    // To prevent "unused function" compiler warnings/errors
+                    (void)Blit8888to8888PixelAlpha;
+                    (void)Blit8888to8888PixelAlphaSwizzle;
+                    return Blit8888to8888PixelAlphaSwizzleSVE2;
+                }
+#endif
 #if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
                 // To prevent "unused function" compiler warnings/errors
                 (void)Blit8888to8888PixelAlpha;
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index 204c1addbd..b014d4233a 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -26,6 +26,10 @@
 #include "SDL_surface_c.h"
 #include "SDL_blit_copy.h"
 
+#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
+#include "./arm/SDL_sve2_blit_N.h"
+#endif
+
 // General optimized routines that write char by char
 #define HAVE_FAST_WRITE_INT8 1
 
@@ -3117,10 +3121,27 @@ SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface)
                 return Blit8888to8888PixelSwizzleSSE41;
             }
 #endif
+#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
+            if (SDL_HasSVE2()) {
+                return Blit8888to8888PixelSwizzleSVE2;
+            }
+#endif
 #if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
             return Blit8888to8888PixelSwizzleNEON;
 #endif
         }
+#if defined(SDL_SVE2_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
+        if (SDL_HasSVE2()) {
+            /* RGBA8888/ARGB8888/XRGB8888 -> RGB565 */
+            if (srcfmt->bytes_per_pixel == 4 &&
+                dstfmt->bytes_per_pixel == 2 &&
+                dstfmt->Rmask == 0x0000F800 &&
+                dstfmt->Gmask == 0x000007E0 &&
+                dstfmt->Bmask == 0x0000001F) {
+                return Blit8888to565PixelSwizzleSVE2;
+            }
+        }
+#endif
 
         blitfun = NULL;
         if (dstfmt->bits_per_pixel > 8) {
diff --git a/src/video/arm/SDL_sve2_blit_A.c b/src/video/arm/SDL_sve2_blit_A.c
new file mode 100644
index 0000000000..be029bcc70
--- /dev/null
+++ b/src/video/arm/SDL_sve2_blit_A.c
@@ -0,0 +1,89 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SDL_sve2_blit_A.h"
+#include <assert.h>
+
+#ifdef SDL_SVE2_INTRINSICS
+
+#undef sdl_sve_rgb32_blend_op_fill_alpha
+#define sdl_sve_rgb32_blend_op_fill_alpha(ma_alpha_chn_idx)              \
+    if (sve_src_chn_idx == (ma_alpha_chn_idx)) {                         \
+        /* fill alpha */                                                 \
+        sve_target_u16 = svdup_u16(0xFF);                                \
+    } else {                                                             \
+        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
+        sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16,     \
+                                                     sve_target_u16,     \
+                                                     vMask);             \
+    }
+
+#undef sdl_sve_rgb32_blend_op_copy_alpha
+#define sdl_sve_rgb32_blend_op_copy_alpha(ma_alpha_chn_idx)              \
+    if (sve_src_chn_idx == (ma_alpha_chn_idx)) {                         \
+        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
+        sve_target_u16 = sdl_sve_chn_blend_with_mask(svdup_u16(0xFF),    \
+                                                     sve_target_u16,     \
+                                                     vMask);             \
+    } else {                                                             \
+        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
+        sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16,     \
+                                                     sve_target_u16,     \
+                                                     vMask);             \
+    }
+
+#undef sdl_sve_rgb32_blend_to_rgb565_op
+#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx)               \
+    do {                                                                 \
+        svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
+        sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16,     \
+                                                     sve_target_u16,     \
+                                                     vMask);             \
+    } while (0)
+
+#include "SDL_sve2_swizzle.h"
+
+/*-----------------------------------------------------------------------------*
+ * Swizzle Blend with Alpha                                                    *
+ *-----------------------------------------------------------------------------*/
+SDL_TARGETING("arch=armv8-a+sve2")
+void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info)
+{
+    const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
+    assert(0 != srcfmt->Amask);
+    (void)srcfmt;
+
+    sdl_sve_8888_to_8888_swizzle_dispatcher(info);
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info)
+{
+    sdl_sve_rgb32_to_rgb565_swizzle_dispatcher(info);
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+size_t SDL_GetSVEVectorSize(void)
+{
+    return svlen(svundef_u8()) * 8;
+}
+
+#endif /* SDL_SVE2_INTRINSICS */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_blit_A.h b/src/video/arm/SDL_sve2_blit_A.h
new file mode 100644
index 0000000000..2a7e2b8149
--- /dev/null
+++ b/src/video/arm/SDL_sve2_blit_A.h
@@ -0,0 +1,37 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SDL_sve2_blit_A_h_
+#define SDL_sve2_blit_A_h_
+
+#include "../../SDL_internal.h"
+#include "../SDL_blit.h"
+
+#ifdef SDL_SVE2_INTRINSICS
+
+void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
+void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
+
+size_t SDL_GetSVEVectorSize(void);
+
+#endif /* SDL_SVE2_INTRINSICS */
+
+#endif /* SDL_sve2_blitters_h_ */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_blit_N.c b/src/video/arm/SDL_sve2_blit_N.c
new file mode 100644
index 0000000000..c6ae97e53b
--- /dev/null
+++ b/src/video/arm/SDL_sve2_blit_N.c
@@ -0,0 +1,64 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SDL_sve2_blit_N.h"
+#include <assert.h>
+
+#ifdef SDL_SVE2_INTRINSICS
+
+#undef sdl_sve_rgb32_blend_op_fill_alpha
+#define sdl_sve_rgb32_blend_op_fill_alpha(ma_alpha_chn_idx) \
+    do {                                                    \
+        if (sve_src_chn_idx == (ma_alpha_chn_idx)) {        \
+            /* fill alpha */                                \
+            sve_target_u16 = svdup_u16(0xFF);               \
+        } else {                                            \
+            sve_target_u16 = sve_source_u16;                \
+        }                                                   \
+    } while (0)
+
+#undef sdl_sve_rgb32_blend_op_copy_alpha
+#define sdl_sve_rgb32_blend_op_copy_alpha(ma_alpha_chn_idx) \
+    do {                                                    \
+        sve_target_u16 = sve_source_u16;                    \
+    } while (0)
+
+#undef sdl_sve_rgb32_blend_to_rgb565_op
+#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
+    do {                                                   \
+        sve_target_u16 = sve_source_u16;                   \
+    } while (0)
+
+#include "SDL_sve2_swizzle.h"
+
+SDL_TARGETING("arch=armv8-a+sve2")
+void Blit8888to8888PixelSwizzleSVE2(SDL_BlitInfo *info)
+{
+    sdl_sve_8888_to_8888_swizzle_dispatcher(info);
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+void Blit8888to565PixelSwizzleSVE2(SDL_BlitInfo *info)
+{
+    sdl_sve_rgb32_to_rgb565_swizzle_dispatcher(info);
+}
+
+#endif /* SDL_SVE2_INTRINSICS */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_blit_N.h b/src/video/arm/SDL_sve2_blit_N.h
new file mode 100644
index 0000000000..3868de0dbb
--- /dev/null
+++ b/src/video/arm/SDL_sve2_blit_N.h
@@ -0,0 +1,35 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SDL_sve2_blit_N_h_
+#define SDL_sve2_blit_N_h_
+
+#include "../../SDL_internal.h"
+#include "../SDL_blit.h"
+
+#ifdef SDL_SVE2_INTRINSICS
+
+void Blit8888to8888PixelSwizzleSVE2(SDL_BlitInfo *info);
+void Blit8888to565PixelSwizzleSVE2(SDL_BlitInfo *info);
+
+#endif /* SDL_SVE2_INTRINSICS */
+
+#endif /* SDL_sve2_blitters_h_ */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_extension.h b/src/video/arm/SDL_sve2_extension.h
new file mode 100644
index 0000000000..2f5a74a12b
--- /dev/null
+++ b/src/video/arm/SDL_sve2_extension.h
@@ -0,0 +1,1142 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#if !defined(SDL_SVE2_EXTENSION_H) //&& (defined(__ARM_FEATURE_SVE2) && __ARM_FEATURE_SVE2)
+#define SDL_SVE2_EXTENSION_H
+
+#include "SDL_sve2_util.h"
+#include <arm_sve.h>
+#include <stdint.h>
+
+/*!
+ * \brief a wrapper for __attribute__((nonnull))
+ */
+#ifndef ARM_NONNULL
+#define ARM_NONNULL(...) __attribute__((nonnull(__VA_ARGS__)))
+#endif
+
+#define svlenu8()  svcntb_pat(SV_ALL)
+#define svlenu16() (svcntb_pat(SV_ALL) / sizeof(uint16_t))
+#define svlenu32() (svcntb_pat(SV_ALL) / sizeof(uint32_t))
+#define svlenu64() (svcntb_pat(SV_ALL) / sizeof(uint64_t))
+
+#define svlens8()  svlenu8()
+#define svlens16() svlenu16()
+#define svlens32() svlenu32()
+#define svlens64() svlenu64()
+
+#define sdl_sve_stride_loop_accc8888(ma_stride_size, ma_pred_name)       \
+    for (svbool_t ma_pred_name, *pTemp = &ma_pred_name;                  \
+         pTemp != NULL;                                                  \
+         pTemp = NULL)                                                   \
+        for (size_t SVE_SAFE_NAME(n) = 0,                                \
+                    sve_iteration_advance = svlenu32() * 4;              \
+             ({                                                          \
+                 ma_pred_name = svwhilelt_b8((int32_t)SVE_SAFE_NAME(n),  \
+                                             (int32_t)(ma_stride_size)); \
+                 SVE_SAFE_NAME(n) < (ma_stride_size);                    \
+             });                                                         \
+             SVE_SAFE_NAME(n) += sve_iteration_advance)
+
+#define sdl_sve_stride_loop_rgb32(ma_stride_size, ma_pred_name) \
+    sdl_sve_stride_loop_accc8888(ma_stride_size, ma_pred_name)
+
+#define sdl_sve_stride_loop_rgb16(ma_stride_size, ma_pred_name)           \
+    for (svbool_t ma_pred_name, *pTemp = &ma_pred_name;                   \
+         pTemp != NULL;                                                   \
+         pTemp = NULL)                                                    \
+        for (size_t SVE_SAFE_NAME(n) = 0,                                 \
+                    sve_iteration_advance = svlenu16();                   \
+             ({                                                           \
+                 ma_pred_name = svwhilelt_b16((int32_t)SVE_SAFE_NAME(n),  \
+                                              (int32_t)(ma_stride_size)); \
+                 SVE_SAFE_NAME(n) < (ma_stride_size);                     \
+             });                                                          \
+             SVE_SAFE_NAME(n) += sve_iteration_advance)
+
+#define sdl_sve_pixel_ccc_foreach_chn(ma_source_u16x3,                    \
+                                      ma_target_u16x3,                    \
+                                      ...)                                \
+    do {                                                                  \
+        svuint16x3_t sve_source_u16x3 = ma_source_u16x3;                  \
+        (void)sve_source_u16x3;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget3((ma_source_u16x3), 0);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget3((ma_source_u16x3), 1);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget3((ma_source_u16x3), 2);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 2, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_accc_foreach_chn012(ma_source_u16x4,                \
+                                          ma_target_u16x4,                \
+                                          ...)                            \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_accc_foreach_chn(ma_source_u16x4,                   \
+                                       ma_target_u16x4,                   \
+                                       ...)                               \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn(ma_source_u16x4,                  \
+                                        ma_target_u16x4,                  \
+                                        ...)                              \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev(ma_source_u16x4,      \
+                                                    ma_target_u16x4,      \
+                                                    ...)                  \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_accc_ccca(ma_source_u16x4,        \
+                                                  ma_target_u16x4,        \
+                                                  ...)                    \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_ccca_accc(ma_source_u16x4,        \
+                                                  ma_target_u16x4,        \
+                                                  ...)                    \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_a123_a321(ma_source_u16x4,        \
+                                                  ma_target_u16x4,        \
+                                                  ...)                    \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_123a_321a(ma_source_u16x4,        \
+                                                  ma_target_u16x4,        \
+                                                  ...)                    \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 3;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 3);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 3, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 2, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget4((ma_target_u16x4), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x4 = svset4(ma_target_u16x4, 1, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_argb_rgb565(ma_source_u16x4,      \
+                                                    ma_target_u16x3,      \
+                                                    ...)                  \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 2, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_rgba_rgb565(ma_source_u16x4,      \
+                                                    ma_target_u16x3,      \
+                                                    ...)                  \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 2, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_bgra_rgb565(ma_source_u16x4,      \
+                                                    ma_target_u16x3,      \
+                                                    ...)                  \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 3;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 3);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 2, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+#define sdl_sve_pixel_u16x4_foreach_chn_abgr_rgb565(ma_source_u16x4,      \
+                                                    ma_target_u16x3,      \
+                                                    ...)                  \
+    do {                                                                  \
+        svuint16x4_t sve_source_u16x4 = ma_source_u16x4;                  \
+        (void)sve_source_u16x4;                                           \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 2;                            \
+            const uint8_t sve_dst_chn_idx = 0;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 2);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 0);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 0, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 1;                            \
+            const uint8_t sve_dst_chn_idx = 1;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 1);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 1);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 1, sve_target_u16); \
+        } while (0);                                                      \
+        do {                                                              \
+            const uint8_t sve_src_chn_idx = 0;                            \
+            const uint8_t sve_dst_chn_idx = 2;                            \
+            (void)sve_src_chn_idx;                                        \
+            (void)sve_dst_chn_idx;                                        \
+            svuint16_t sve_source_u16 = svget4((ma_source_u16x4), 0);     \
+            svuint16_t sve_target_u16 = svget3((ma_target_u16x3), 2);     \
+            (void)sve_source_u16;                                         \
+            (void)sve_target_u16;                                         \
+            __VA_ARGS__                                                   \
+            ma_target_u16x3 = svset3(ma_target_u16x3, 2, sve_target_u16); \
+        } while (0);                                                      \
+    } while (0)
+
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16x3_t sdl_sve_rgb565_unpack(svuint16_t vPixels)
+{
+    svuint16_t vBlue = svand_n_u16_m(svptrue_b16(), vPixels, 0x1F);
+    svuint16_t vGreen = svand_n_u16_m(svptrue_b16(), vPixels, (0x3F << 5));
+    svuint16_t vRed = svand_n_u16_m(svptrue_b16(), vPixels, (0x1F << 11));
+
+    return svcreate3_u16(svlsl_n_u16_m(svptrue_b16(), vBlue, 3),
+                         svlsr_n_u16_m(svptrue_b16(), vGreen, 3),
+                         svlsr_n_u16_m(svptrue_b16(), vRed, 8));
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_rgb565_pack(svuint16x3_t vRGB16x3)
+{
+    svuint16_t vRed = svlsr_n_u16_m(svptrue_b16(), svget3_u16(vRGB16x3, 0), 3);
+    svuint16_t vGreen = svlsl_n_u16_m(svptrue_b16(),
+                                      svand_n_u16_m(svptrue_b16(),
+                                                    svget3_u16(vRGB16x3, 1),
+                                                    (0x3F << 2)),
+                                      3);
+    svuint16_t vBlue = svlsl_n_u16_m(svptrue_b16(),
+                                     svand_n_u16_m(svptrue_b16(),
+                                                   svget3_u16(vRGB16x3, 2),
+                                                   (0x1F << 3)),
+                                     8);
+
+    svuint16_t vPixel = svorr_u16_m(svptrue_b16(), vRed, vGreen);
+    return svorr_u16_m(svptrue_b16(), vPixel, vBlue);
+
+    // return  (svget3_u16(vRGB16x3, 0) >> 3)
+    //     |   ((svget3_u16(vRGB16x3, 1) & (0x3F << 2)) << 3)
+    //     |   ((svget3_u16(vRGB16x3, 2) & (0x1F << 3)) << 8);
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(2, 3, 4)
+static inline void svld3rgb565_u16(svbool_t vPredu8,
+                                   uint16_t *phwSource,
+                                   svuint16x3_t *pvLow,
+                                   svuint16x3_t *pvHigh)
+{
+    svuint8x2_t vInput8x2 = svld2_u8(vPredu8, (uint8_t *)phwSource);
+
+    svuint16_t vLowByteLowHalf = svunpklo_u16(svget2_u8(vInput8x2, 0));
+    svuint16_t vLowByteHighHalf = svunpkhi_u16(svget2_u8(vInput8x2, 0));
+
+    svuint16_t vHighByteLowHalf = svunpklo_u16(svget2_u8(vInput8x2, 1));
+    svuint16_t vHighByteHighHalf = svunpkhi_u16(svget2_u8(vInput8x2, 1));
+
+    //*pvLow = sdl_sve_rgb565_unpack  (   vLowByteLowHalf
+    //                                |   (vHighByteLowHalf << 8));
+    *pvLow = sdl_sve_rgb565_unpack(
+        svorr_u16_m(svptrue_b16(),
+                    vLowByteLowHalf,
+                    //(vHighByteLowHalf << 8)
+                    svlsl_n_u16_m(svptrue_b16(), vHighByteLowHalf, 8)));
+
+    //*pvHigh = sdl_sve_rgb565_unpack (   vLowByteHighHalf
+    //                                |   (vHighByteHighHalf << 8));
+    *pvHigh = sdl_sve_rgb565_unpack(
+        svorr_u16_m(svptrue_b16(),
+                    vLowByteHighHalf,
+                    //(vHighByteHighHalf << 8)
+                    svlsl_n_u16_m(svptrue_b16(), vHighByteHighHalf, 8)));
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(2)
+static inline void svst3rgb565_u16(svbool_t vPredu8,
+                                   uint16_t *phwTarget,
+                                   svuint16x3_t vLow,
+                                   svuint16x3_t vHigh)
+{
+    svuint16_t vLowByteLowHalf = svundef_u16();
+    svuint16_t vHighByteLowHalf = svundef_u16();
+
+    /* pack low half pixels */
+    do {
+        svuint16_t vPixel = sdl_sve_rgb565_pack(vLow);
+
+        // vLowByteLowHalf = vPixel & 0xFF;
+        vLowByteLowHalf = svand_n_u16_m(svptrue_b16(), vPixel, 0xFF);
+
+        // vHighByteLowHalf = vPixel >> 8;
+        vHighByteLowHalf = svlsr_n_u16_m(svptrue_b16(), vPixel, 8);
+    } while (0);
+
+    svuint16_t vLowByteHighHalf = svundef_u16();
+    svuint16_t vHighByteHighHalf = svundef_u16();
+
+    /* pack high half pixels */
+    do {
+        svuint16_t vPixel = sdl_sve_rgb565_pack(vHigh);
+
+        // vLowByteHighHalf = vPixel & 0xFF;
+        vLowByteHighHalf = svand_n_u16_m(svptrue_b16(), vPixel, 0xFF);
+
+        // vHighByteHighHalf = vPixel >> 8;
+        vHighByteHighHalf = svlsr_n_u16_m(svptrue_b16(), vPixel, 8);
+    } while (0);
+
+    /* save rgb565 pixels */
+    svuint8_t vLowByte = svuzp1_u8(svreinterpret_u8(vLowByteLowHalf),
+                                   svreinterpret_u8(vLowByteHighHalf));
+
+    svuint8_t vHighByte = svuzp1_u8(svreinterpret_u8(vHighByteLowHalf),
+                                    svreinterpret_u8(vHighByteHighHalf));
+
+    svst2_u8(vPredu8, (uint8_t *)phwTarget, svcreate2_u8(vLowByte, vHighByte));
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define svld4ub_u16(ma_pred,                                                                                         \
+                    ma_src_ptr,                                                                                      \
+                    ma_svuint16x4_low_ptr,                                                                           \
+                    ma_svuint16x4_high_ptr)                                                                          \
+    do {                                                                                                             \
+        svuint8x4_t vInput8x4 = svld4_u8((ma_pred), (ma_src_ptr));                                                   \
+                                                                                                                     \
+        *(ma_svuint16x4_low_ptr) = svset4_u16(*(ma_svuint16x4_low_ptr), 0, svunpklo_u16(svget4_u8(vInput8x4, 0)));   \
+        *(ma_svuint16x4_low_ptr) = svset4_u16(*(ma_svuint16x4_low_ptr), 1, svunpklo_u16(svget4_u8(vInput8x4, 1)));   \
+        *(ma_svuint16x4_low_ptr) = svset4_u16(*(ma_svuint16x4_low_ptr), 2, svunpklo_u16(svget4_u8(vInput8x4, 2)));   \
+        *(ma_svuint16x4_low_ptr) = svset4_u16(*(ma_svuint16x4_low_ptr), 3, svunpklo_u16(svget4_u8(vInput8x4, 3)));   \
+                                                                                                                     \
+        *(ma_svuint16x4_high_ptr) = svset4_u16(*(ma_svuint16x4_high_ptr), 0, svunpkhi_u16(svget4_u8(vInput8x4, 0))); \
+        *(ma_svuint16x4_high_ptr) = svset4_u16(*(ma_svuint16x4_high_ptr), 1, svunpkhi_u16(svget4_u8(vInput8x4, 1))); \
+        *(ma_svuint16x4_high_ptr) = svset4_u16(*(ma_svuint16x4_high_ptr), 2, svunpkhi_u16(svget4_u8(vInput8x4, 2))); \
+        *(ma_svuint16x4_high_ptr) = svset4_u16(*(ma_svuint16x4_high_ptr), 3, svunpkhi_u16(svget4_u8(vInput8x4, 3))); \
+    } while (0)
+
+#define svst4ub_u16(ma_pred,                                                                 \
+                    ma_dst_ptr,                                                              \
+                    ma_svuint16x4_low,                                                       \
+                    ma_svuint16x4_high)                                                      \
+    do {                                                                                     \
+        svuint8_t vCH0u8 = svuzp1_u8(svreinterpret_u8(svget4_u16((ma_svuint16x4_low), 0)),   \
+                                     svreinterpret_u8(svget4_u16((ma_svuint16x4_high), 0))); \
+                                                                                             \
+        svuint8_t vCH1u8 = svuzp1_u8(svreinterpret_u8(svget4_u16((ma_svuint16x4_low), 1)),   \
+                                     svreinterpret_u8(svget4_u16((ma_svuint16x4_high), 1))); \
+                                                                                             \
+        svuint8_t vCH2u8 = svuzp1_u8(svreinterpret_u8(svget4_u16((ma_svuint16x4_low), 2)),   \
+                                     svreinterpret_u8(svget4_u16((ma_svuint16x4_high), 2))); \
+                                                                                             \
+        svuint8_t vCH3u8 = svuzp1_u8(svreinterpret_u8(svget4_u16((ma_svuint16x4_low), 3)),   \
+                                     svreinterpret_u8(svget4_u16((ma_svuint16x4_high), 3))); \
+                                                                                             \
+        svst4_u8((ma_pred), (ma_dst_ptr), svcreate4_u8(vCH0u8, vCH1u8, vCH2u8, vCH3u8));     \
+    } while (0)
+#else
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(2, 3, 4)
+static inline void svld4ub_u16(svbool_t vPredu8,
+                               uint8_t *pchSource,
+                               svuint16x4_t *pvLow,
+                               svuint16x4_t *pvHigh)
+{
+    svuint8x4_t vInput8x4 = svld4_u8(vPredu8, pchSource);
+
+    *pvLow = svset4_u16(*pvLow, 0, svunpklo_u16(svget4_u8(vInput8x4, 0)));
+    *pvLow = svset4_u16(*pvLow, 1, svunpklo_u16(svget4_u8(vInput8x4, 1)));
+    *pvLow = svset4_u16(*pvLow, 2, svunpklo_u16(svget4_u8(vInput8x4, 2)));
+    *pvLow = svset4_u16(*pvLow, 3, svunpklo_u16(svget4_u8(vInput8x4, 3)));
+
+    *pvHigh = svset4_u16(*pvHigh, 0, svunpkhi_u16(svget4_u8(vInput8x4, 0)));
+    *pvHigh = svset4_u16(*pvHigh, 1, svunpkhi_u16(svget4_u8(vInput8x4, 1)));
+    *pvHigh = svset4_u16(*pvHigh, 2, svunpkhi_u16(svget4_u8(vInput8x4, 2)));
+    *pvHigh = svset4_u16(*pvHigh, 3, svunpkhi_u16(svget4_u8(vInput8x4, 3)));
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(2)
+static inline void svst4ub_u16(svbool_t vPredu8,
+                               uint8_t *pchTarget,
+                               svuint16x4_t vLow,
+                               svuint16x4_t vHigh)
+{
+
+    svuint8_t vCH0u8 = svuzp1_u8(svreinterpret_u8(svget4_u16(vLow, 0)),
+                                 svreinterpret_u8(svget4_u16(vHigh, 0)));
+
+    svuint8_t vCH1u8 = svuzp1_u8(svreinterpret_u8(svget4_u16(vLow, 1)),
+                                 svreinterpret_u8(svget4_u16(vHigh, 1)));
+
+    svuint8_t vCH2u8 = svuzp1_u8(svreinterpret_u8(svget4_u16(vLow, 2)),
+                                 svreinterpret_u8(svget4_u16(vHigh, 2)));
+
+    svuint8_t vCH3u8 = svuzp1_u8(svreinterpret_u8(svget4_u16(vLow, 3)),
+                                 svreinterpret_u8(svget4_u16(vHigh, 3)));
+
+    svst4_u8(vPredu8, pchTarget, svcreate4_u8(vCH0u8, vCH1u8, vCH2u8, vCH3u8));
+}
+#endif
+
+/*! \note the Element range of vMask is [0, 0xFF]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint16_t vTarget, svuint16_t vMask)
+{
+    // vTarget = vSource * vMask + vTarget * (255 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    vTemp0 = svmla_u16_m(svptrue_b16(),
+                         vTemp0,
+                         vTarget,
+                         svsub_u16_m(svptrue_b16(),
+                                     svdup_u16(255),
+                                     vMask));
+
+    vTemp0 = svadd_n_u16_m(svptrue_b16(), vTemp0, 1);
+
+    svuint16_t vTemp1 = svlsr_n_u16_m(svptrue_b16(), vTemp0, 8);
+    /* x += x >> 8 */
+    vTemp0 = svadd_u16_m(svptrue_b16(),
+                         vTemp0,
+                         vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
+}
+
+/*! \note the hwOpacity range [0, 0x100]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_opacity(svuint16_t vSource,
+                                                        svuint16_t vTarget,
+                                                        uint16_t hwOpacity)
+{
+    // svuint16_t vOpacity = svdup_u16(hwOpacity);
+    // vTarget = vSource * vOpacity + vTarget * (256 - vOpacity);
+
+    svuint16_t vTemp0 = svmul_n_u16_m(svptrue_b16(), vSource, hwOpacity);
+    svuint16_t vTemp1 = svmul_n_u16_m(svptrue_b16(),
+                                      vTarget,
+                                      256 - hwOpacity);
+    vTarget = svadd_u16_m(svptrue_b16(), vTemp0, vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
+}
+
+/*! \note the Element range of vMask is [0, 0xFF]
+ *  \note the hwOpacity range [0, 0x100]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_mask_and_opacity(svuint16_t vSource,
+                                                                 svuint16_t vTarget,
+                                                                 svuint16_t vMask,
+                                                                 uint16_t hwOpacity)
+{
+    vMask = svsel(svcmpeq_n_u16(svptrue_b16(), vMask, 255),
+                  svdup_u16(hwOpacity),
+                  //(vMask * hwOpacity) >> 8,
+                  svlsr_n_u16_m(svptrue_b16(),
+                                svmul_n_u16_m(svptrue_b16(), vMask, hwOpacity),
+                                8));
+
+    // vTarget = vSource * vMask + vTarget * (256 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    svuint16_t vTemp1 = svmul_u16_m(svptrue_b16(),
+                                    vTarget,
+                                    svsub_u16_m(svptrue_b16(),
+                                                svdup_u16(256),
+                                                vMask));
+    vTarget = svadd_u16_m(svptrue_b16(), vTemp0, vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
+}
+
+/*! \note the Element range of vMask0/1 is [0, 0xFF]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_masks(svuint16_t vSource,
+                                                      svuint16_t vTarget,
+                                                      svuint16_t vMask0,
+                                                      svuint16_t vMask1)
+{
+    vMask1 = svadd_u16_m(svcmpeq_n_u16(svptrue_b16(), vMask1, 255),
+                         vMask1,
+                         svdup_u16(1));
+
+    svuint16_t vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask0, 255),
+              vMask1,
+              //(vMask0 * vMask1) >> 8,
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_u16_m(svptrue_b16(), vMask0, vMask1),
+                            8));
+
+    // vTarget = vSource * vMask + vTarget * (256 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    svuint16_t vTemp1 = svmul_u16_m(svptrue_b16(),
+                                    vTarget,
+                                    svsub_u16_m(svptrue_b16(),
+                                                svdup_u16(256),
+                                                vMask));
+    vTarget = svadd_u16_m(svptrue_b16(), vTemp0, vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
+}
+
+/*! \note the Element range of vMask0/1 is [0, 0xFF]
+ *  \note the hwOpacity range [0, 0x100]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_masks_and_opacity(
+    svuint16_t vSource,
+    svuint16_t vTarget,
+    svuint16_t vMask0,
+    svuint16_t vMask1,
+    uint16_t hwOpacity)
+{
+    vMask0 = svadd_u16_m(svcmpeq_n_u16(svptrue_b16(), vMask0, 255),
+                         vMask0,
+                         svdup_u16(1));
+
+    svuint16_t vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask1, 255), /* >= 255 */
+              vMask0,
+              //(vMask0 * vMask1) >> 8
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_u16_m(svptrue_b16(), vMask0, vMask1),
+                            8));
+
+    vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask, 255),
+              svdup_u16(hwOpacity),
+              //(vMask * hwOpacity) >> 8,
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_n_u16_m(svptrue_b16(), vMask, hwOpacity),
+                            8));
+
+    // vTarget = vSource * vMask + vTarget * (256 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    svuint16_t vTemp1 = svmul_u16_m(svptrue_b16(),
+                                    vTarget,
+                                    svsub_u16_m(svptrue_b16(),
+                                                svdup_u16(256),
+                                                vMask));
+    vTarget = svadd_u16_m(svptrue_b16(), vTemp0, vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
+}
+
+/*! \note the Element range of vMask0/1 is [0, 0xFF]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_3masks(svuint16_t vSource,
+                                                       svuint16_t vTarget,
+                                                       svuint16_t vMask0,
+                                                       svuint16_t vMask1,
+                                                       svuint16_t vMask2)
+{
+    vMask0 = svadd_u16_m(svcmpeq_n_u16(svptrue_b16(), vMask0, 255),
+                         vMask0,
+                         svdup_u16(1));
+
+    svuint16_t vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask1, 255),
+              vMask0,
+              //(vMask0 * vMask1) >> 8
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_u16_m(svptrue_b16(), vMask0, vMask1),
+                            8));
+
+    vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask2, 255),
+              vMask,
+              //(vMask * vMask2) >> 8
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_u16_m(svptrue_b16(), vMask, vMask2),
+                            8));
+
+    // vTarget = vSource * vMask + vTarget * (256 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    svuint16_t vTemp1 = svmul_u16_m(svptrue_b16(),
+                                    vTarget,
+                                    svsub_u16_m(svptrue_b16(),
+                                                svdup_u16(256),
+                                                vMask));
+    vTarget = svadd_u16_m(svptrue_b16(), vTemp0, vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
+}
+
+/*! \note the Element range of vMask0/1 is [0, 0xFF]
+ *  \note the hwOpacity range [0, 0x100]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_3masks_and_opacity(
+    svuint16_t vSource,
+    svuint16_t vTarget,
+    svuint16_t vMask0,
+    svuint16_t vMask1,
+    svuint16_t vMask2,
+    uint16_t hwOpacity)
+{
+    vMask0 = svadd_u16_m(svcmpeq_n_u16(svptrue_b16(), vMask0, 255),
+                         vMask0,
+                         svdup_u16(1));
+
+    svuint16_t vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask1, 255),
+              vMask0,
+              //(vMask0 * vMask1) >> 8
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_u16_m(svptrue_b16(), vMask0, vMask1),
+                            8));
+
+    vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask2, 255),
+              vMask,
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_u16_m(svptrue_b16(), vMask, vMask2),
+                            8));
+    //(vMask * vMask2) >> 8);
+
+    vMask =
+        svsel(svcmpge_n_u16(svptrue_b16(), vMask, 255),
+              svdup_u16(hwOpacity),
+              //(vMask * hwOpacity) >> 8
+              svlsr_n_u16_m(svptrue_b16(),
+                            svmul_n_u16_m(svptrue_b16(), vMask, hwOpacity),
+                            8));
+
+    // vTarget = vSource * vMask + vTarget * (256 - vMask);
+    svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+    svuint16_t vTemp1 = svmul_u16_m(svptrue_b16(),
+                                    vTarget,
+                                    svsub_u16_m(svptrue_b16(),
+                                                svdup_u16(256),
+                                                vMask));
+    vTarget = svadd_u16_m(svptrue_b16(), vTemp0, vTemp1);
+
+    return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
+}
+
+#endif /* SDL_SVE2_EXTENSION_H */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_swizzle.h b/src/video/arm/SDL_sve2_swizzle.h
new file mode 100644
index 0000000000..a2d6f978d2
--- /dev/null
+++ b/src/video/arm/SDL_sve2_swizzle.h
@@ -0,0 +1,2375 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#if !defined(SD_SVE2_SWIZZLE_H) //&& (defined(__ARM_FEATURE_SVE2) && __ARM_FEATURE_SVE2)
+#define SD_SVE2_SWIZZLE_H
+
+#include "SDL_sve2_extension.h"
+
+#define sdl_sve_rgb32_stride_impl(ma_sve_chn_iterator, ...)   \
+    sdl_sve_stride_loop_rgb32(uStride, vTailPred)             \
+    {                                                         \
+                                                              \
+        svuint16x4_t vSourceLow16x4 = svundef4_u16();         \
+        svuint16x4_t vSourceHigh16x4 = svundef4_u16();        \
+                                                              \
+        svuint16x4_t vTargetLow16x4 = svundef4_u16();         \
+        svuint16x4_t vTargetHigh16x4 = svundef4_u16();        \
+                                                              \
+        svld4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwSource,                      \
+                    &vSourceLow16x4,                          \
+                    &vSourceHigh16x4);                        \
+                                                              \
+        svld4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwTarget,                      \
+                    &vTargetLow16x4,                          \
+                    &vTargetHigh16x4);                        \
+                                                              \
+        /* process low half */                                \
+        ma_sve_chn_iterator(vSourceLow16x4, vTargetLow16x4,   \
+                            __VA_ARGS__);                     \
+                                                              \
+        /* process high half */                               \
+        ma_sve_chn_iterator(vSourceHigh16x4, vTargetHigh16x4, \
+                            __VA_ARGS__);                     \
+                                                              \
+        svst4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwTarget,                      \
+                    vTargetLow16x4,                           \
+                    vTargetHigh16x4);                         \
+                                                              \
+        pwSource += sve_iteration_advance;                    \
+        pwTarget += sve_iteration_advance;                    \
+    }
+
+#define sdl_sve_rgb32_no_alpha_stride_impl(                   \
+    ma_alpha_idx,                                             \
+    ma_sve_chn_iterator,                                      \
+    ...)                                                      \
+    sdl_sve_stride_loop_rgb32(uStride, vTailPred)             \
+    {                                                         \
+                                                              \
+        svuint16x4_t vSourceLow16x4 = svundef4_u16();         \
+        svuint16x4_t vSourceHigh16x4 = svundef4_u16();        \
+                                                              \
+        svuint16x4_t vTargetLow16x4 = svundef4_u16();         \
+        svuint16x4_t vTargetHigh16x4 = svundef4_u16();        \
+                                                              \
+        svld4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwSource,                      \
+                    &vSourceLow16x4,                          \
+                    &vSourceHigh16x4);                        \
+                                                              \
+        svld4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwTarget,                      \
+                    &vTargetLow16x4,                          \
+                    &vTargetHigh16x4);                        \
+                                                              \
+        vSourceLow16x4 = svset4(vSourceLow16x4,               \
+                                (ma_alpha_idx),               \
+                                svdup_u16(0xFF));             \
+        vSourceHigh16x4 = svset4(vSourceHigh16x4,             \
+                                 (ma_alpha_idx),              \
+                                 svdup_u16(0xFF));            \
+                                                              \
+        /* process low half */                                \
+        ma_sve_chn_iterator(vSourceLow16x4, vTargetLow16x4,   \
+                            __VA_ARGS__);                     \
+                                                              \
+        /* process high half */                               \
+        ma_sve_chn_iterator(vSourceHigh16x4, vTargetHigh16x4, \
+                            __VA_ARGS__);                     \
+                                                              \
+        svst4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwTarget,                      \
+                    vTargetLow16x4,                           \
+                    vTargetHigh16x4);                         \
+                                                              \
+        pwSource += sve_iteration_advance;                    \
+        pwTarget += sve_iteration_advance;                    \
+    }
+
+#define sdl_sve_rgb32_to_rgb565_stride_impl(ma_sve_chn_iterator, ...) \
+    sdl_sve_stride_loop_rgb32(uStride, vTailPred)                     \
+    {                                                                 \
+                                                                      \
+        svuint16x4_t vSourceLow16x4 = svundef4_u16();                 \
+        svuint16x4_t vSourceHigh16x4 = svundef4_u16();                \
+                                                                      \
+        svuint16x3_t vTargetLow16x3 = svundef3_u16();                 \
+        svuint16x3_t vTargetHigh16x3 = svundef3_u16();                \
+                                                                      \
+        svld4ub_u16(vTailPred,                                        \
+                    (uint8_t *)pwSource,                              \
+                    &vSourceLow16x4,                                  \
+                    &vSourceHigh16x4);                                \
+                                                                      \
+        svld3rgb565_u16(vTailPred,                                    \
+                        phwTarget,                                    \
+                        &vTargetLow16x3,                              \
+                        &vTargetHigh16x3);                            \
+                                                                      \
+        ma_sve_chn_iterator(vSourceLow16x4, vTargetLow16x3,           \
+                            __VA_ARGS__);                             \
+                                                                      \
+        ma_sve_chn_iterator(vSourceHigh16x4, vTargetHigh16x3,         \
+                            __VA_ARGS__);                             \
+                                                                      \
+        svst3rgb565_u16(vTailPred,                                    \
+                        phwTarget,                                    \
+                        vTargetLow16x3,                               \
+                        vTargetHigh16x3);                             \
+                                                                      \
+        pwSource += sve_iteration_advance;                            \
+        phwTarget += sve_iteration_advance;                           \
+    }
+
+#define sdl_sve_rgb32_no_alpha_to_rgb565_stride_impl(         \
+    ma_alpha_idx,                                             \
+    ma_sve_chn_iterator,                                      \
+    ...)                                                      \
+    sdl_sve_stride_loop_rgb32(uStride, vTailPred)             \
+    {                                                         \
+                                                              \
+        svuint16x4_t vSourceLow16x4 = svundef4_u16();         \
+        svuint16x4_t vSourceHigh16x4 = svundef4_u16();        \
+                                                              \
+        svuint16x3_t vTargetLow16x3 = svundef3_u16();         \
+        svuint16x3_t vTargetHigh16x3 = svundef3_u16();        \
+                                                              \
+        svld4ub_u16(vTailPred,                                \
+                    (uint8_t *)pwSource,                      \
+                    &vSourceLow16x4,                          \
+                    &vSourceHigh16x4);                        \
+                                                              \
+        vSourceLow16x4 = svset4(vSourceLow16x4,               \
+                                (ma_alpha_idx),               \
+                                svdup_u16(0xFF));             \
+        vSourceHigh16x4 = svset4(vSourceHigh16x4,             \
+                                 (ma_alpha_idx),              \
+                                 svdup_u16(0xFF));            \
+                                                              \
+        svld3rgb565_u16(vTailPred,                            \
+                        phwTarget,                            \
+                        &vTargetLow16x3,                      \
+                        &vTargetHigh16x3);                    \
+                                                              \
+        ma_sve_chn_iterator(vSourceLow16x4, vTargetLow16x3,   \
+                            __VA_ARGS__);                     \
+                                                              \
+        ma_sve_chn_iterator(vSourceHigh16x4, vTargetHigh16x3, \
+                            __VA_ARGS__);                     \
+                                                              \
+        svst3rgb565_u16(vTailPred,                            \
+                        phwTarget,                            \
+                        vTargetLow16x3,                       \
+                        vTargetHigh16x3);                     \
+                                                              \
+        pwSource += sve_iteration_advance;                    \
+        phwTarget += sve_iteration_advance;                   \
+    }
+
+#ifndef sdl_sve_rgb32_blend_op_fill_alpha
+#define sdl_sve_rgb32_blend_op_fill_alpha(ma_alpha_chn_idx)
+#endif
+
+#ifndef sdl_sve_rgb32_blend_op_copy_alpha
+#define sdl_sve_rgb32_blend_op_copy_alpha(ma_alpha_chn_idx)
+#endif
+
+/*
+ * Source: ACCC and CCCA
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_accc_stride_blend_to_accc_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn,
+
+                              sdl_sve_rgb32_blend_op_fill_alpha(3);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_accc_stride_blend_to_accc_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn,
+                              sdl_sve_rgb32_blend_op_copy_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_ccca_stride_blend_to_ccca_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn,
+                              sdl_sve_rgb32_blend_op_fill_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_ccca_stride_blend_to_ccca_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn,
+
+                              sdl_sve_rgb32_blend_op_copy_alpha(0);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_accc_blend_to_accc_fill_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_accc_stride_blend_to_accc_fill_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_accc_blend_to_accc_copy_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_accc_stride_blend_to_accc_copy_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_ccca_blend_to_ccca_fill_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_ccca_stride_blend_to_ccca_fill_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_ccca_blend_to_ccca_copy_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_ccca_stride_blend_to_ccca_copy_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_a123_stride_blend_to_321a_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                              sdl_sve_rgb32_blend_op_fill_alpha(3);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_a123_stride_blend_to_321a_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                              sdl_sve_rgb32_blend_op_copy_alpha(3);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_a123_blend_to_321a_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_a123_stride_blend_to_321a_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_a123_blend_to_321a_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_a123_stride_blend_to_321a_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123a_stride_blend_to_a321_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                              sdl_sve_rgb32_blend_op_fill_alpha(0);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123a_stride_blend_to_a321_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                              sdl_sve_rgb32_blend_op_copy_alpha(0);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123a_blend_to_a321_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123a_stride_blend_to_a321_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123a_blend_to_a321_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123a_stride_blend_to_a321_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_accc_stride_blend_to_ccca_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_accc_ccca,
+                              sdl_sve_rgb32_blend_op_fill_alpha(3);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_accc_stride_blend_to_ccca_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_accc_ccca,
+                              sdl_sve_rgb32_blend_op_copy_alpha(3);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_accc_blend_to_ccca_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_accc_stride_blend_to_ccca_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_accc_blend_to_ccca_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_accc_stride_blend_to_ccca_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_ccca_stride_blend_to_accc_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_ccca_accc,
+                              sdl_sve_rgb32_blend_op_fill_alpha(0);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_ccca_stride_blend_to_accc_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_ccca_accc,
+                              sdl_sve_rgb32_blend_op_copy_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_ccca_blend_to_accc_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_ccca_stride_blend_to_accc_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_ccca_blend_to_accc_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_ccca_stride_blend_to_accc_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_a123_stride_blend_to_a321_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_a123_a321,
+                              sdl_sve_rgb32_blend_op_fill_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_a123_stride_blend_to_a321_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_a123_a321,
+                              sdl_sve_rgb32_blend_op_copy_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_a123_blend_to_a321_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_a123_stride_blend_to_a321_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_a123_blend_to_a321_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_a123_stride_blend_to_a321_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123a_stride_blend_to_321a_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_123a_321a,
+                              sdl_sve_rgb32_blend_op_fill_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123a_stride_blend_to_321a_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+    sdl_sve_rgb32_stride_impl(sdl_sve_pixel_u16x4_foreach_chn_123a_321a,
+                              sdl_sve_rgb32_blend_op_copy_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123a_blend_to_321a_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123a_stride_blend_to_321a_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123a_blend_to_321a_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123a_stride_blend_to_321a_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+/*
+ * Source: XCCC and CCCX
+ */
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_xccc_stride_blend_to_accc_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_xccc_stride_blend_to_accc_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_cccx_stride_blend_to_ccca_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(0);
+
+    );
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_cccx_stride_blend_to_ccca_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_xccc_blend_to_accc_fill_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_xccc_stride_blend_to_accc_fill_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_xccc_blend_to_accc_copy_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_xccc_stride_blend_to_accc_copy_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_cccx_blend_to_ccca_fill_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_cccx_stride_blend_to_ccca_fill_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_cccx_blend_to_ccca_copy_alpha(
+    uint8_t *SDL_RESTRICT pchSource,
+    size_t uSourceStride,
+    uint8_t *SDL_RESTRICT pchTarget,
+    size_t uTargetStride,
+    int nWidth,
+    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_cccx_stride_blend_to_ccca_copy_alpha(
+            (uint32_t *)pchSource,
+            (uint32_t *)pchTarget,
+            nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_x123_stride_blend_to_321a_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_x123_stride_blend_to_321a_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_x123_blend_to_321a_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_x123_stride_blend_to_321a_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_x123_blend_to_321a_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_x123_stride_blend_to_321a_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123x_stride_blend_to_a321_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123x_stride_blend_to_a321_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn_src_dst_rev,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123x_blend_to_a321_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123x_stride_blend_to_a321_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123x_blend_to_a321_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123x_stride_blend_to_a321_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_xccc_stride_blend_to_ccca_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn_accc_ccca,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_xccc_stride_blend_to_ccca_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn_accc_ccca,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_xccc_blend_to_ccca_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_xccc_stride_blend_to_ccca_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_xccc_blend_to_ccca_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_xccc_stride_blend_to_ccca_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_cccx_stride_blend_to_accc_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn_ccca_accc,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_cccx_stride_blend_to_accc_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn_ccca_accc,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_cccx_blend_to_accc_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_cccx_stride_blend_to_accc_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_cccx_blend_to_accc_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_cccx_stride_blend_to_accc_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_x123_stride_blend_to_a321_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn_a123_a321,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_x123_stride_blend_to_a321_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(3,
+                                       sdl_sve_pixel_u16x4_foreach_chn_a123_a321,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(3););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_x123_blend_to_a321_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_x123_stride_blend_to_a321_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_x123_blend_to_a321_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_x123_stride_blend_to_a321_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123x_stride_blend_to_321a_fill_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn_123a_321a,
+                                       sdl_sve_rgb32_blend_op_fill_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_123x_stride_blend_to_321a_copy_alpha(
+    uint32_t *SDL_RESTRICT pwSource,
+    uint32_t *SDL_RESTRICT pwTarget,
+    size_t uStride)
+{
+    sdl_sve_rgb32_no_alpha_stride_impl(0,
+                                       sdl_sve_pixel_u16x4_foreach_chn_123a_321a,
+                                       sdl_sve_rgb32_blend_op_copy_alpha(0););
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123x_blend_to_321a_fill_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123x_stride_blend_to_321a_fill_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_123x_blend_to_321a_copy_alpha(uint8_t *SDL_RESTRICT pchSource,
+                                                         size_t uSourceStride,
+                                                         uint8_t *SDL_RESTRICT pchTarget,
+                                                         size_t uTargetStride,
+                                                         int nWidth,
+                                                         int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_123x_stride_blend_to_321a_copy_alpha((uint32_t *)pchSource,
+                                                     (uint32_t *)pchTarget,
+                                                     nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1)
+static inline void sdl_sve_8888_to_8888_swizzle_dispatcher(SDL_BlitInfo *info)
+{
+    int width = info->dst_w;
+    int height = info->dst_h;
+    uint8_t *src = info->src;
+    int srcskip = info->src_skip;
+    uint8_t *dst = info->dst;
+    int dstskip = info->dst_skip;
+
+    const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
+    const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
+
+    // Set up some basic variables
+    int srcbpp = srcfmt->bytes_per_pixel;
+    int dstbpp = dstfmt->bytes_per_pixel;
+
+    assert((srcbpp == 4) && (dstbpp == 4));
+
+    bool fill_alpha = (!dstfmt->Amask);
+
+    int srcstride = srcskip + srcbpp * width;
+    int dststride = dstskip + dstbpp * width;
+
+    switch (srcfmt->format) {
+    case SDL_PIXELFORMAT_XRGB8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_xccc_blend_to_accc_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_xccc_blend_to_accc_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_xccc_blend_to_ccca_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_xccc_blend_to_ccca_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_x123_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_x123_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_x123_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_x123_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_ARGB8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_accc_blend_to_accc_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_accc_blend_to_accc_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_accc_blend_to_ccca_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_accc_blend_to_ccca_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_a123_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_a123_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_a123_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_a123_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_RGBX8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_cccx_blend_to_accc_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_cccx_blend_to_accc_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_cccx_blend_to_ccca_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_cccx_blend_to_ccca_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_123x_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123x_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_123x_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123x_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_RGBA8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_ccca_blend_to_accc_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_ccca_blend_to_accc_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_ccca_blend_to_ccca_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_ccca_blend_to_ccca_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_123a_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123a_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_123a_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123a_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_XBGR8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_x123_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_x123_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_x123_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_x123_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_xccc_blend_to_accc_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_xccc_blend_to_accc_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_xccc_blend_to_ccca_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_xccc_blend_to_ccca_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_ABGR8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_a123_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_a123_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_a123_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_a123_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_accc_blend_to_accc_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_accc_blend_to_accc_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_accc_blend_to_ccca_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_accc_blend_to_ccca_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_BGRX8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_123x_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123x_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_123x_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123x_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_cccx_blend_to_accc_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_cccx_blend_to_accc_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_cccx_blend_to_ccca_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_cccx_blend_to_ccca_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    case SDL_PIXELFORMAT_BGRA8888:
+        switch (dstfmt->format) {
+        case SDL_PIXELFORMAT_ARGB8888:
+        case SDL_PIXELFORMAT_XRGB8888:
+            if (fill_alpha) {
+                sdl_sve_123a_blend_to_a321_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123a_blend_to_a321_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_RGBA8888:
+        case SDL_PIXELFORMAT_RGBX8888:
+            if (fill_alpha) {
+                sdl_sve_123a_blend_to_321a_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_123a_blend_to_321a_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_ABGR8888:
+        case SDL_PIXELFORMAT_XBGR8888:
+            if (fill_alpha) {
+                sdl_sve_ccca_blend_to_accc_fill_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            } else {
+                sdl_sve_ccca_blend_to_accc_copy_alpha(src,
+                                                      srcstride,
+                                                      dst,
+                                                      dststride,
+                                                      width,
+                                                      height);
+            }
+            break;
+
+        case SDL_PIXELFORMAT_BGRA8888:
+        case SDL_PIXELFORMAT_BGRX8888:
+            if (fill_alpha) {
+                sdl_sve_ccca_blend_to_ccca_fill_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            } else {
+                sdl_sve_ccca_blend_to_ccca_copy_alpha(
+                    src,
+                    srcstride,
+                    dst,
+                    dststride,
+                    width,
+                    height);
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+
+    default:
+        assert(false);
+        break;
+    }
+}
+
+#ifndef sdl_sve_rgb32_blend_to_rgb565_op
+#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx)
+#endif
+
+/*
+ * ACCC or CCCA
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_argb8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_to_rgb565_stride_impl(
+        sdl_sve_pixel_u16x4_foreach_chn_argb_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(3);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_argb8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_argb8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_rgba8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_to_rgb565_stride_impl(
+        sdl_sve_pixel_u16x4_foreach_chn_rgba_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(0);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_rgba8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_rgba8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_bgra8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_to_rgb565_stride_impl(
+        sdl_sve_pixel_u16x4_foreach_chn_bgra_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(0);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_bgra8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_bgra8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_abgr8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_to_rgb565_stride_impl(
+        sdl_sve_pixel_u16x4_foreach_chn_abgr_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(3);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_abgr8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_abgr8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+/*
+ * XCCC or CCCX
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_xrgb8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_no_alpha_to_rgb565_stride_impl(
+        3,
+        sdl_sve_pixel_u16x4_foreach_chn_argb_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(3);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_xrgb8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_xrgb8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_rgbx8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_no_alpha_to_rgb565_stride_impl(
+        0,
+        sdl_sve_pixel_u16x4_foreach_chn_rgba_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(0);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_rgbx8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_rgbx8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_bgrx8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_no_alpha_to_rgb565_stride_impl(
+        0,
+        sdl_sve_pixel_u16x4_foreach_chn_bgra_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(0);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_bgrx8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_bgrx8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_xbgr8888_stride_blend_to_rgb565(uint32_t *SDL_RESTRICT pwSource,
+                                                           uint16_t *SDL_RESTRICT phwTarget,
+                                                           size_t uStride)
+{
+    sdl_sve_rgb32_no_alpha_to_rgb565_stride_impl(
+        3,
+        sdl_sve_pixel_u16x4_foreach_chn_abgr_rgb565,
+        {
+            sdl_sve_rgb32_blend_to_rgb565_op(3);
+        });
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_xbgr8888_blend_to_rgb565(uint8_t *SDL_RESTRICT pchSource,
+                                                    size_t uSourceStride,
+                                                    uint8_t *SDL_RESTRICT pchTarget,
+                                                    size_t uTargetStride,
+                                                    int nWidth,
+                                                    int nHeight)
+{
+    while (nHeight--) {
+
+        sdl_sve_xbgr8888_stride_blend_to_rgb565((uint32_t *)pchSource,
+                                                (uint16_t *)pchTarget,
+                                                nWidth);
+
+        pchSource += uSourceStride;
+        pchTarget += uTargetStride;
+    }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1)
+static inline void sdl_sve_rgb32_to_rgb565_swizzle_dispatcher(SDL_BlitInfo *info)
+{
+    int width = info->dst_w;
+    int height = info->dst_h;
+    uint8_t *src = info->src;
+    int srcskip = info->src_skip;
+    uint8_t *dst = info->dst;
+    int dstskip = info->dst_skip;
+
+    const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
+    const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
+
+    // Set up some basic variables
+    int srcbpp = srcfmt->bytes_per_pixel;
+    int dstbpp = dstfmt->bytes_per_pixel;
+
+    assert(srcbpp == 4);
+    assert(dstbpp == 2);
+
+    int srcstride = srcskip + srcbpp * width;
+    int dststride = dstskip + dstbpp * width;
+
+    switch (srcfmt->format) {
+    case SDL_PIXELFORMAT_XRGB8888:
+        sdl_sve_xrgb8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_ARGB8888:
+        sdl_sve_argb8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_RGBX8888:
+        sdl_sve_rgbx8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_RGBA8888:
+        sdl_sve_rgba8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_XBGR8888:
+        sdl_sve_xbgr8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_ABGR8888:
+        sdl_sve_abgr8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_BGRX8888:
+        sdl_sve_bgrx8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    case SDL_PIXELFORMAT_BGRA8888:
+        sdl_sve_bgra8888_blend_to_rgb565(src,
+                                         srcstride,
+                                         dst,
+                                         dststride,
+                                         width,
+                                         height);
+        break;
+
+    default:
+        assert(false);
+        break;
+    }
+}
+
+#endif /* SD_SVE2_SWIZZLE_H */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_util.h b/src/video/arm/SDL_sve2_util.h
new file mode 100644
index 0000000000..2a1602b432
--- /dev/null
+++ b/src/video/arm/SDL_sve2_util.h
@@ -0,0 +1,206 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SDL_SVE2_UTIL_H
+#define SDL_SVE2_UTIL_H
+
+#undef SVE_0_CONNECT2
+#undef SVE_0_CONNECT3
+#undef SVE_0_CONNECT4
+#undef SVE_0_CONNECT5
+#undef SVE_0_CONNECT6
+#undef SVE_0_CONNECT7
+#undef SVE_0_CONNECT8
+#undef SVE_0_CONNECT9
+
+#undef SVE_CONNECT2
+#undef SVE_CONNECT3
+#undef SVE_CONNECT4
+#undef SVE_CONNECT5
+#undef SVE_CONNECT6
+#undef SVE_CONNECT7
+#undef SVE_CONNECT8
+#undef SVE_CONNECT9
+#undef ALT_SVE_CONNECT2
+
+#undef SVE_SAFE_NAME
+
+#undef SVE_CONNECT
+
+#define SVE_0_CONNECT2(ma_A, ma_B)             ma_A##ma_B
+#define SVE_0_CONNECT3(ma_A, ma_B, ma_C)       ma_A##ma_B##ma_C
+#define SVE_0_CONNECT4(ma_A, ma_B, ma_C, ma_D) ma_A##ma_B##ma_C##ma_D
+#define SVE_0_CONNECT5(ma_A, ma_B, ma_C, ma_D, ma_E) \
+    ma_A##ma_B##ma_C##ma_D##ma_E
+#define SVE_0_CONNECT6(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F) \
+    ma_A##ma_B##ma_C##ma_D##ma_E##ma_F
+#define SVE_0_CONNECT7(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G) \
+    ma_A##ma_B##ma_C##ma_D##ma_E##ma_F##ma_G
+#define SVE_0_CONNECT8(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H) \
+    ma_A##ma_B##ma_C##ma_D##ma_E##ma_F##ma_G##ma_H
+#define SVE_0_CONNECT9(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H, ma_I) \
+    ma_A##ma_B##ma_C##ma_D##ma_E##ma_F##ma_G##ma_H##ma_I
+
+#define ALT_SVE_CONNECT2(ma_A, ma_B)   SVE_0_CONNECT2(ma_A, ma_B)
+#define SVE_CONNECT2(ma_A, ma_B)       SVE_0_CONNECT2(ma_A, ma_B)
+#define SVE_CONNECT3(ma_A, ma_B, ma_C) SVE_0_CONNECT3(ma_A, ma_B, ma_C)
+#define SVE_CONNECT4(ma_A, ma_B, ma_C, ma_D) \
+    SVE_0_CONNECT4(ma_A, ma_B, ma_C, ma_D)
+#define SVE_CONNECT5(ma_A, ma_B, ma_C, ma_D, ma_E) \
+    SVE_0_CONNECT5(ma_A, ma_B, ma_C, ma_D, ma_E)
+#define SVE_CONNECT6(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F) \
+    SVE_0_CONNECT6(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F)
+#define SVE_CONNECT7(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G) \
+    SVE_0_CONNECT7(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G)
+#define SVE_CONNECT8(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H) \
+    SVE_0_CONNECT8(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H)
+#define SVE_CONNECT9(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H, ma_I) \
+    SVE_0_CONNECT9(ma_A, ma_B, ma_C, ma_D, ma_E, ma_F, ma_G, ma_H, ma_I)
+
+#define SVE_CONNECT(...)          \
+    ALT_SVE_CONNECT2(SVE_CONNECT, \
+                     SVE_VA_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#ifndef SVE_VA_NUM_ARGS_IMPL
+#define SVE_VA_NUM_ARGS_IMPL(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \
+                             _12, _13, _14, _15, _16, ma_N, ...) ma_N
+#endif
+
+#ifndef SVE_VA_NUM_ARGS
+#define SVE_VA_NUM_ARGS(...)                                              \
+    SVE_VA_NUM_ARGS_IMPL(0, ##__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, \
+                         8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define SVE_SAFE_NAME(ma_NAME) SVE_CONNECT3(ma_, ma_NAME, ma_LINEma_)
+
+/* ---------------------------------------------------------------------------*
+ * SVE Test Helper                                                            *
+ * ---------------------------------------------------------------------------*/
+
+#define SVT_PRINT_VECTOR(ma_VECOTOR, ma_ELEMENT_T, ma_FORMAT_STRING)            \
+    do {                                                                        \
+        int_fast8_t nElementCount = svcntb_pat(SV_ALL) / sizeof(ma_ELEMENT_T);  \
+        uint8_t SVE_SAFE_NAME(chVectorBuffer)                                   \
+            [nElementCount * sizeof(ma_ELEMENT_T)];                             \
+                                                                                \
+        svst1_u8(svptrue_b8(),                                                  \
+                 SVE_SAFE_NAME(chVectorBuffer),                                 \
+                 svreinterpret_u8(ma_VECOTOR));                                 \
+                                                                                \
+        ma_ELEMENT_T *pElement = (ma_ELEMENT_T *)SVE_SAFE_NAME(chVectorBuffer); \
+        printf("%s\t[", #ma_VECOTOR);                                           \
+        do {                                                                    \
+            printf(ma_FORMAT_STRING "\t", (int)*pElement++);                    \
+        } while (--nElementCount);                                              \
+        printf("]\r\n");                                                        \
+    } while (0)
+
+#define SVT_INIT_VECOTR(ma_VECTOR, ma_ELEMENT_T, ...)                               \
+    do {                                                                            \
+        uint8_t SVE_SAFE_NAME(chVectorBuffer)[svcntb_pat(SV_ALL)];                  \
+                                                                                    \
+        memset(SVE_SAFE_NAME(chVectorBuffer), /* This should NOT be SDL_memset() */ \
+               0,                                                                   \
+               sizeof(SVE_SAFE_NAME(chVectorBuffer)));                              \
+        memcpy(SVE_SAFE_NAME(chVectorBuffer), /* This should NOT be SDL_memcpy() */ \
+               (ma_ELEMENT_T[]){ __VA_ARGS__ },                                     \
+               MIN(sizeof(SVE_SAFE_NAME(chVectorBuffer)),                           \
+                   sizeof((ma_ELEMENT_T[]){ __VA_ARGS__ })));                       \
+                                                                                    \
+        ma_VECTOR = svld1(svptrue_b8(),                                             \
+                          (ma_ELEMENT_T *)SVE_SAFE_NAME(chVectorBuffer));           \
+    } while (0)
+
+#define SVT_INIT_PRED(ma_PREDICT, ...)                                        \
+    do {                                                                      \
+        uint8_t SVE_SAFE_NAME(chBuffer)[svlen(svundef_u64())];                \
+        memset(SVE_SAFE_NAME(chBuffer), /* This should NOT be SDL_memset() */ \
+               0,                                                             \
+               sizeof(SVE_SAFE_NAME(chBuffer)));                              \
+                                                                              \
+        memcpy(SVE_SAFE_NAME(chBuffer), /* This should NOT be SDL_memcpy() */ \
+               (uint8_t[]){ __VA_ARGS__ },                                    \
+               MIN(sizeof(SVE_SAFE_NAME(chBuffer)),                           \
+                   sizeof((uint8_t[]){ __VA_ARGS__ })));                      \
+                                                                              \
+        ma_PREDICT = (*(svbool_t *)SVE_SAFE_NAME(chBuffer));                  \
+    } while (0)
+
+#define SVT_PRINT_PRED(ma_PREDICT, ma_TYPE_T)                                 \
+    do {                                                                      \
+        printf("%8s\t[", #ma_PREDICT);                                        \
+        uint16_t SVE_SAFE_NAME(hwBuffer)[svlen(svundef_u64()) / 2];           \
+        memset(SVE_SAFE_NAME(hwBuffer), /* This should NOT be SDL_memset() */ \
+               0,                                                             \
+               sizeof(SVE_SAFE_NAME(hwBuffer)));                              \
+        *(volatile svbool_t *)SVE_SAFE_NAME(hwBuffer) = (ma_PREDICT);         \
+                                                                              \
+        uint_fast16_t SVE_SAFE_NAME(nTotalBits) = svlen(svundef_u8());        \
+        uint_fast8_t SVE_SAFE_NAME(nElementBits) = sizeof(ma_TYPE_T);         \
+                                                                              \
+        uint16_t *phwPred = SVE_SAFE_NAME(hwBuffer);                          \
+        do {                                                                  \
+            uint16_t hwPred = *phwPred++;                                     \
+                                                                              \
+            for (uint_fast8_t n = 0;                                          \
+                 n < 16;                                                      \
+                 n += SVE_SAFE_NAME(nElementBits)) {                          \
+                                                                              \
+                if (hwPred & 0x01) {                                          \
+                    printf("True ");                                          \
+                } else {                                                      \
+                    printf("False");                                          \
+                }                                                             \
+                printf("%*s\t", (int)sizeof(ma_TYPE_T) - 1, "");              \
+                hwPred >>= SVE_SAFE_NAME(nElementBits);                       \
+            }                                                                 \
+                                                                              \
+            SVE_SAFE_NAME(nTotalBits) -= 16;                                  \
+        } while (SVE_SAFE_NAME(nTotalBits));                                  \
+                                                                              \
+        printf("]\r\n");                                                      \
+    } while (0)
+
+#define SVT_PRINT_BUFFER(ma_BUFF_PTR, ma_SIZE, ma_TYPE_T, ma_FMT_STR, ma_STRIDE) \
+    do {                                                                         \
+        ma_TYPE_T *pBuffer = (ma_TYPE_T *)ma_BUFF_PTR;                           \
+        size_t nElementCount = (ma_SIZE) / sizeof(ma_TYPE_T);                    \
+                                                                                 \
+        size_t nStrideSize = (ma_STRIDE);                                        \
+        size_t nLineCount = 0;                                                   \
+                                                                                 \
+        printf("%s\n\t", #ma_BUFF_PTR);                                          \
+        do {                                                                     \
+                                                                                 \
+            printf(ma_FMT_STR " ", *pBuffer++);                                  \
+            nLineCount++;                                                        \
+            if (nLineCount >= nStrideSize) {                                     \
+                nLineCount = 0;                                                  \
+                printf("\n\t");                                                  \
+            }                                                                    \
+                                                                                 \
+        } while (--nElementCount);                                               \
+        printf("\n");                                                            \
+                                                                                 \
+    } while (0)
+
+#endif /* SDL_SVE2_UTIL_H */
\ No newline at end of file
diff --git a/test/testplatform.c b/test/testplatform.c
index 4e79f6326c..d42c72d10f 100644
--- a/test/testplatform.c
+++ b/test/testplatform.c
@@ -414,6 +414,7 @@ static int TestCPUInfo(bool verbose)
         SDL_Log("NEON %s", SDL_HasNEON() ? "detected" : "not detected");
         SDL_Log("LSX %s", SDL_HasLSX() ? "detected" : "not detected");
         SDL_Log("LASX %s", SDL_HasLASX() ? "detected" : "not detected");
+        SDL_Log("SVE2 %s", SDL_HasSVE2() ? "detected" : "not detected");
         SDL_Log("System RAM %d MB", SDL_GetSystemRAM());
         SDL_Log("System memory page size %d bytes", SDL_GetSystemPageSize());
     }