From 92b8513377eaff10438cf6a8b8a5fdb09862c0e5 Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka@google.com>
Date: Tue, 11 Mar 2025 19:54:48 +0000
Subject: [PATCH] Mark C globals with assembly reference to small code model

By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it's too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being small). However, assembly files continue to access these
globals defined in C/C++ files using older (and invalid instruction
sequence). So, we mark all such globals with an attribute that forces
them to be allocated in small sections allowing them to validly be
accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

There is similar patch in dav1d which inspired this patch:
https://code.videolan.org/videolan/dav1d/-/merge_requests/1785
---
 common/osdep.h  | 15 +++++++++++++++
 common/tables.h | 35 ++++++++++++++++++-----------------
 encoder/rdo.c   |  4 ++--
 3 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/common/osdep.h b/common/osdep.h
index 23739b7a..d7373613 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -285,6 +285,9 @@ static inline int x264_is_regular_file( FILE *filehandle )
 #define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
 #endif
 
+// Use DECLARE_ASM to declare variables that are accessed from hardcoded assembly
+#define DECLARE_ASM( var ) var ATTR_MCMODEL_SMALL
+
 #define ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
 #define ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
 #define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
@@ -363,6 +366,18 @@ static inline int x264_is_regular_file( FILE *filehandle )
 #define x264_nonconstant_p(x) 0
 #endif
 
+#ifdef __has_attribute
+#define HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define HAS_ATTRIBUTE(x) 0
+#endif
+
+#if ARCH_X86_64 && defined(__ELF__) && HAS_ATTRIBUTE(model)
+#define ATTR_MCMODEL_SMALL __attribute__((model("small")))
+#else
+#define ATTR_MCMODEL_SMALL
+#endif
+
 /* threads */
 #if HAVE_BEOSTHREAD
 #include <kernel/OS.h>
diff --git a/common/tables.h b/common/tables.h
index b081908d..7549886a 100644
--- a/common/tables.h
+++ b/common/tables.h
@@ -27,6 +27,7 @@
 #ifndef X264_TABLES_H
 #define X264_TABLES_H
 
+#include "osdep.h"
 typedef struct
 {
     uint8_t i_bits;
@@ -67,28 +68,28 @@ extern const uint8_t x264_cqm_avci300_2160p_4iy[16];
 extern const uint8_t x264_cqm_avci300_2160p_4ic[16];
 extern const uint8_t x264_cqm_avci300_2160p_8iy[64];
 
-extern const uint8_t x264_decimate_table4[16];
-extern const uint8_t x264_decimate_table8[64];
+extern DECLARE_ASM(const uint8_t x264_decimate_table4[16]);
+extern DECLARE_ASM(const uint8_t x264_decimate_table8[64]);
 
-extern const uint32_t x264_dct4_weight_tab[16];
-extern const uint32_t x264_dct8_weight_tab[64];
-extern const uint32_t x264_dct4_weight2_tab[16];
-extern const uint32_t x264_dct8_weight2_tab[64];
+extern DECLARE_ASM(const uint32_t x264_dct4_weight_tab[16]);
+extern DECLARE_ASM(const uint32_t x264_dct8_weight_tab[64]);
+extern DECLARE_ASM(const uint32_t x264_dct4_weight2_tab[16]);
+extern DECLARE_ASM(const uint32_t x264_dct8_weight2_tab[64]);
 
 extern const int8_t   x264_cabac_context_init_I[1024][2];
 extern const int8_t   x264_cabac_context_init_PB[3][1024][2];
-extern const uint8_t  x264_cabac_range_lps[64][4];
-extern const uint8_t  x264_cabac_transition[128][2];
-extern const uint8_t  x264_cabac_renorm_shift[64];
-extern const uint16_t x264_cabac_entropy[128];
+extern DECLARE_ASM(const uint8_t  x264_cabac_range_lps[64][4]);
+extern DECLARE_ASM(const uint8_t  x264_cabac_transition[128][2]);
+extern DECLARE_ASM(const uint8_t  x264_cabac_renorm_shift[64]);
+extern DECLARE_ASM(const uint16_t x264_cabac_entropy[128]);
 
-extern const uint8_t  x264_significant_coeff_flag_offset_8x8[2][64];
-extern const uint8_t  x264_last_coeff_flag_offset_8x8[63];
-extern const uint8_t  x264_coeff_flag_offset_chroma_422_dc[7];
-extern const uint16_t x264_significant_coeff_flag_offset[2][16];
-extern const uint16_t x264_last_coeff_flag_offset[2][16];
-extern const uint16_t x264_coeff_abs_level_m1_offset[16];
-extern const uint8_t  x264_count_cat_m1[14];
+extern DECLARE_ASM(const uint8_t  x264_significant_coeff_flag_offset_8x8[2][64]);
+extern DECLARE_ASM(const uint8_t  x264_last_coeff_flag_offset_8x8[63]);
+extern DECLARE_ASM(const uint8_t  x264_coeff_flag_offset_chroma_422_dc[7]);
+extern DECLARE_ASM(const uint16_t x264_significant_coeff_flag_offset[2][16]);
+extern DECLARE_ASM(const uint16_t x264_last_coeff_flag_offset[2][16]);
+extern DECLARE_ASM(const uint16_t x264_coeff_abs_level_m1_offset[16]);
+extern DECLARE_ASM(const uint8_t  x264_count_cat_m1[14]);
 
 extern const vlc_t x264_coeff0_token[6];
 extern const vlc_t x264_coeff_token[6][16][4];
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 9fc67610..89fa8975 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -33,9 +33,9 @@
 /* Transition and size tables for abs<9 MVD and residual coding */
 /* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */
 #define x264_cabac_transition_unary x264_template(cabac_transition_unary)
-uint8_t x264_cabac_transition_unary[15][128];
+DECLARE_ASM(uint8_t x264_cabac_transition_unary[15][128]);
 #define x264_cabac_size_unary x264_template(cabac_size_unary)
-uint16_t x264_cabac_size_unary[15][128];
+DECLARE_ASM(uint16_t x264_cabac_size_unary[15][128]);
 /* Transition and size tables for abs>9 MVD */
 /* Consist of 5 1s and a bypass sign bit */
 static uint8_t cabac_transition_5ones[128];
-- 
GitLab