From 4613ac3c15fd75cebc4b9f65b7fb95e70a3acce1 Mon Sep 17 00:00:00 2001
From: Henrik Gramner <henrik@gramner.com>
Date: Sun, 12 May 2024 22:29:20 +0200
Subject: [PATCH] x86inc: Improve ELF PIC support for external function calls

PLT/GOT indirections are required in some cases. Most commonly when
calling functions from other shared libraries, but also in some
scenarios when calling functions with default symbol visibility
even within the same component on certain elf64 platforms.

On elf64 we can simply use PLT relocations for all calls to external
functions. Since the linker is able to eliminate unnecessary PLT
indirections with the final output binary being identical to non-PLT
relocations there isn't really any downside to doing so. This mimics
what regular compilers normally do for calls to external functions.

On elf32 with PIC we can use a function pointer from the GOT when
calling external functions, similar to what regular compilers do when
using -fno-plt. Since this both introduces overhead and clobbers one
register, which could potentially have been used for custom calling
conventions when calling other asm functions within the same library,
it's only performed for functions declared using 'cextern_naked'.
---
 common/x86/x86inc.asm | 25 ++++++++++++++++++++-----
 tools/checkasm-a.asm  |  4 ----
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 0f89d5f5..486bad68 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -239,7 +239,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 %elif PIC
     call $+5 ; special-cased to not affect the RSB on most CPU:s
     pop %1
-    add %1, (%2)-$+1
+    add %1, -$+1+%2
 %else
     mov %1, %2
 %endif
@@ -873,16 +873,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 
 %macro cextern 1
     %xdefine %1 mangle(private_prefix %+ _ %+ %1)
-    CAT_XDEFINE cglobaled_, %1, 1
+    CAT_XDEFINE cglobaled_, %1, 2
     extern %1
 %endmacro
 
-; like cextern, but without the prefix
+; Like cextern, but without the prefix. This should be used for symbols from external libraries.
 %macro cextern_naked 1
     %ifdef PREFIX
         %xdefine %1 mangle(%1)
     %endif
-    CAT_XDEFINE cglobaled_, %1, 1
+    CAT_XDEFINE cglobaled_, %1, 3
     extern %1
 %endmacro
 
@@ -1277,12 +1277,27 @@ INIT_XMM
 %endmacro
 %macro call_internal 2
     %xdefine %%i %2
+    %define %%j %%i
     %ifndef cglobaled_%2
         %ifdef cglobaled_%1
             %xdefine %%i %1
         %endif
+    %elif FORMAT_ELF
+        %if ARCH_X86_64
+            %if cglobaled_%2 >= 2
+                ; Always emit PLT relocations when calling external functions,
+                ; the linker will eliminate unnecessary PLT indirections anyway.
+                %define %%j %%i wrt ..plt
+            %endif
+        %elif PIC && cglobaled_%2 == 3
+            ; Go through the GOT for functions declared using cextern_naked with
+            ; PIC, as such functions presumably exists in external libraries.
+            extern _GLOBAL_OFFSET_TABLE_
+            LEA eax, $$+_GLOBAL_OFFSET_TABLE_ wrt ..gotpc
+            %define %%j [eax+%%i wrt ..got]
+        %endif
     %endif
-    call %%i
+    call %%j
     LOAD_MM_PERMUTATION %%i
 %endmacro
 
diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm
index bd600cf2..4d8f3c64 100644
--- a/tools/checkasm-a.asm
+++ b/tools/checkasm-a.asm
@@ -152,11 +152,7 @@ cglobal checkasm_call, 2,15,16,-1*(((max_args+1)*8+STACK_ALIGNMENT-1) & ~(STACK_
     mov  r9, rax
     mov r10, rdx
     lea  r0, [error_message]
-%if FORMAT_ELF
-    call puts wrt ..plt
-%else
     call puts
-%endif
     mov  r1, [rsp+max_args*8]
     mov  dword [r1], 0
     mov  rdx, r10
-- 
GitLab