From 570f6c70808287fc78e3f8f5372a095ec6ef7878 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 28 Feb 2025 18:22:37 +0200
Subject: [PATCH] aarch64: Add runtime detection of extensions on Windows and
 macOS

---
 common/cpu.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/common/cpu.c b/common/cpu.c
index f182cfde..31506264 100644
--- a/common/cpu.c
+++ b/common/cpu.c
@@ -484,6 +484,60 @@ static uint32_t detect_flags( void )
 
     return flags;
 }
+
+#elif defined(__APPLE__)
+#include <sys/sysctl.h>
+
+static int have_feature( const char *feature )
+{
+    int supported = 0;
+    size_t size = sizeof(supported);
+    if ( sysctlbyname( feature, &supported, &size, NULL, 0 ) )
+        return 0;
+    return supported;
+}
+
+static uint32_t detect_flags( void )
+{
+    uint32_t flags = 0;
+
+    if ( have_feature( "hw.optional.arm.FEAT_DotProd" ) )
+        flags |= X264_CPU_DOTPROD;
+    if ( have_feature( "hw.optional.arm.FEAT_I8MM" ) )
+        flags |= X264_CPU_I8MM;
+    /* No SVE and SVE2 feature detection available on Apple platforms. */
+    return flags;
+}
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+static uint32_t detect_flags( void )
+{
+    uint32_t flags = 0;
+
+#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
+    if ( IsProcessorFeaturePresent( PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE ) )
+        flags |= X264_CPU_DOTPROD;
+#endif
+#ifdef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
+    if ( IsProcessorFeaturePresent( PF_ARM_SVE_INSTRUCTIONS_AVAILABLE ) )
+        flags |= X264_CPU_SVE;
+#endif
+#ifdef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
+    if ( IsProcessorFeaturePresent( PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE ) )
+        flags |= X264_CPU_SVE2;
+#endif
+#ifdef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
+    /* There's no PF_* flag that indicates whether plain I8MM is available
+     * or not. But if SVE_I8MM is available, that also implies that
+     * regular I8MM is available. */
+    if ( IsProcessorFeaturePresent( PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE ) )
+        flags |= X264_CPU_I8MM;
+#endif
+    return flags;
+}
+
 #endif
 
 uint32_t x264_cpu_detect( void )
@@ -509,7 +563,8 @@ uint32_t x264_cpu_detect( void )
 #endif
 
     // Where possible, try to do runtime detection as well.
-#if defined(__linux__) || HAVE_ELF_AUX_INFO
+#if defined(__linux__) || HAVE_ELF_AUX_INFO || \
+    defined(__APPLE__) || defined(_WIN32)
     flags |= detect_flags();
 #endif
 
-- 
GitLab