summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2024-02-20 13:53:29 -0800
committerGitHub <noreply@github.com>2024-02-20 13:53:29 -0800
commitce1484089c1271b386a7fe720194796b6263244f (patch)
tree8de6c34e5ce03b1da25588bb8e060e5d2adf141f
parent819ebcf2bbc3dfc80f949d4bfebcd1cb797e3a01 (diff)
[ELF] Support placing .lbss/.lrodata/.ldata after .bss (#81224)upstream/users/MaskRay/spr/main.elf-place-lbsslrodataldata-after-bss
https://reviews.llvm.org/D150510 places .lrodata before .rodata to minimize the number of permission transitions in the memory image. However, this layout is less ideal for -fno-pic code (which is still important). Small code model -fno-pic code has R_X86_64_32S relocations with a range of `[0,2**31)` (if we ignore the negative area). Placing `.lrodata` earlier exerts relocation pressure on such code. Non-x86 64-bit architectures generally have a similar `[0,2**31)` limitation if they don't use PC-relative relocations. If we place .lrodata later, we will need one extra PT_LOAD. Two layouts are appealing: * .bss/.lbss/.lrodata/.ldata (GNU ld) * .bss/.ldata/.lbss/.lrodata The GNU ld layout has the nice property that there is only one BSS (except .tbss/.relro_padding). Add -z lrodata-after-bss to support this layout. Since a read-only PT_LOAD segment (for large data sections) may appear after RW PT_LOAD segments. The placement of `_etext` has to be adjusted.
-rw-r--r--lld/ELF/Config.h1
-rw-r--r--lld/ELF/Driver.cpp2
-rw-r--r--lld/ELF/Writer.cpp47
-rw-r--r--lld/docs/ld.lld.13
-rw-r--r--lld/test/ELF/lto/codemodel.ll8
-rw-r--r--lld/test/ELF/x86-64-section-layout.s38
6 files changed, 77 insertions, 22 deletions
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index fcca8c42b29b..691ebfc07432 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -310,6 +310,7 @@ struct Config {
bool zInitfirst;
bool zInterpose;
bool zKeepTextSectionPrefix;
+ bool zLrodataAfterBss;
bool zNodefaultlib;
bool zNodelete;
bool zNodlopen;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 4bb9b7a0b2a9..24faa1753f1e 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1436,6 +1436,8 @@ static void readConfigs(opt::InputArgList &args) {
config->zInterpose = hasZOption(args, "interpose");
config->zKeepTextSectionPrefix = getZFlag(
args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
+ config->zLrodataAfterBss =
+ getZFlag(args, "lrodata-after-bss", "nolrodata-after-bss", false);
config->zNodefaultlib = hasZOption(args, "nodefaultlib");
config->zNodelete = hasZOption(args, "nodelete");
config->zNodlopen = hasZOption(args, "nodlopen");
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 5b7dfd358e76..0bbf43ddf694 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -911,11 +911,12 @@ enum RankFlags {
RF_NOT_ALLOC = 1 << 26,
RF_PARTITION = 1 << 18, // Partition number (8 bits)
RF_NOT_SPECIAL = 1 << 17,
- RF_WRITE = 1 << 16,
- RF_EXEC_WRITE = 1 << 15,
- RF_EXEC = 1 << 14,
- RF_RODATA = 1 << 13,
- RF_LARGE = 1 << 12,
+ RF_LARGE_ALT = 1 << 15,
+ RF_WRITE = 1 << 14,
+ RF_EXEC_WRITE = 1 << 13,
+ RF_EXEC = 1 << 12,
+ RF_RODATA = 1 << 11,
+ RF_LARGE = 1 << 10,
RF_NOT_RELRO = 1 << 9,
RF_NOT_TLS = 1 << 8,
RF_BSS = 1 << 7,
@@ -974,8 +975,14 @@ static unsigned getSectionRank(OutputSection &osec) {
if (osec.type == SHT_PROGBITS)
rank |= RF_RODATA;
// Among PROGBITS sections, place .lrodata further from .text.
- if (!(osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64))
- rank |= RF_LARGE;
+ // For -z lrodata-after-bss, place .lrodata after .lbss like GNU ld. This
+ // layout has one extra PT_LOAD, but alleviates relocation overflow
+ // pressure for absolute relocations referencing small data from -fno-pic
+ // relocatable files.
+ if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64)
+ rank |= config->zLrodataAfterBss ? RF_LARGE_ALT : 0;
+ else
+ rank |= config->zLrodataAfterBss ? 0 : RF_LARGE;
} else if (isExec) {
rank |= isWrite ? RF_EXEC_WRITE : RF_EXEC;
} else {
@@ -988,10 +995,15 @@ static unsigned getSectionRank(OutputSection &osec) {
osec.relro = true;
else
rank |= RF_NOT_RELRO;
- // Place .ldata and .lbss after .bss. Making .bss closer to .text alleviates
- // relocation overflow pressure.
- if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64)
- rank |= RF_LARGE;
+ // Place .ldata and .lbss after .bss. Making .bss closer to .text
+ // alleviates relocation overflow pressure.
+ // For -z lrodata-after-bss, place .lbss/.lrodata/.ldata after .bss.
+ // .bss/.lbss being adjacent reuses the NOBITS size optimization.
+ if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64) {
+ rank |= config->zLrodataAfterBss
+ ? (osec.type == SHT_NOBITS ? 1 : RF_LARGE_ALT)
+ : RF_LARGE;
+ }
}
// Within TLS sections, or within other RelRo sections, or within non-RelRo
@@ -1103,7 +1115,7 @@ template <class ELFT> void Writer<ELFT>::setReservedSymbolSections() {
}
PhdrEntry *last = nullptr;
- PhdrEntry *lastRO = nullptr;
+ OutputSection *lastRO = nullptr;
auto isLarge = [](OutputSection *osec) {
return config->emachine == EM_X86_64 && osec->flags & SHF_X86_64_LARGE;
};
@@ -1112,17 +1124,18 @@ template <class ELFT> void Writer<ELFT>::setReservedSymbolSections() {
if (p->p_type != PT_LOAD)
continue;
last = p;
- if (!(p->p_flags & PF_W))
- lastRO = p;
+ if (!(p->p_flags & PF_W) && p->lastSec && !isLarge(p->lastSec))
+ lastRO = p->lastSec;
}
}
if (lastRO) {
- // _etext is the first location after the last read-only loadable segment.
+ // _etext is the first location after the last read-only loadable segment
+ // that does not contain large sections.
if (ElfSym::etext1)
- ElfSym::etext1->section = lastRO->lastSec;
+ ElfSym::etext1->section = lastRO;
if (ElfSym::etext2)
- ElfSym::etext2->section = lastRO->lastSec;
+ ElfSym::etext2->section = lastRO;
}
if (last) {
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 12b17dd37796..e4d39e47f5c5 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -791,6 +791,9 @@ flag to indicate to the runtime linker that the object is an interposer.
During symbol resolution interposers are searched after the application
but before other dependencies.
.Pp
+.It Cm lrodata-after-bss
+Place .lrodata after .bss.
+.Pp
.It Cm muldefs
Do not error if a symbol is defined multiple times.
The first definition will be used.
diff --git a/lld/test/ELF/lto/codemodel.ll b/lld/test/ELF/lto/codemodel.ll
index a35f87729411..cf7d0e409ec4 100644
--- a/lld/test/ELF/lto/codemodel.ll
+++ b/lld/test/ELF/lto/codemodel.ll
@@ -2,8 +2,8 @@
; RUN: llvm-as %s -o %t.o
; RUN: ld.lld %t.o -o %ts -mllvm -code-model=small
; RUN: ld.lld %t.o -o %tl -mllvm -code-model=large
-; RUN: llvm-objdump --no-print-imm-hex -d %ts | FileCheck %s --check-prefix=CHECK-SMALL
-; RUN: llvm-objdump --no-print-imm-hex -d %tl | FileCheck %s --check-prefix=CHECK-LARGE
+; RUN: llvm-objdump -d %ts | FileCheck %s --check-prefix=CHECK-SMALL
+; RUN: llvm-objdump -d %tl | FileCheck %s --check-prefix=CHECK-LARGE
target triple = "x86_64-unknown-linux-gnu"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -13,8 +13,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
define ptr @_start() nounwind readonly {
entry:
; CHECK-SMALL-LABEL: <_start>:
-; CHECK-SMALL: movl $2097440, %eax
+; CHECK-SMALL: movl ${{.*}}, %eax
; CHECK-LARGE-LABEL: <_start>:
-; CHECK-LARGE: movabsq $2097440, %rax
+; CHECK-LARGE: movabsq ${{.*}}, %rax
ret ptr @data
}
diff --git a/lld/test/ELF/x86-64-section-layout.s b/lld/test/ELF/x86-64-section-layout.s
index 0ba605393893..b03d3e6c2b99 100644
--- a/lld/test/ELF/x86-64-section-layout.s
+++ b/lld/test/ELF/x86-64-section-layout.s
@@ -12,9 +12,12 @@
# RUN: ld.lld --section-start=.note=0x200300 a1.o -o a1
# RUN: llvm-readelf -S -sX a1 | FileCheck %s --check-prefix=CHECK1
-# RUN: ld.lld -T b.lds -z norelro a.o -o b
+# RUN: ld.lld -T b.lds -z norelro a.o -z lrodata-after-bss -z nolrodata-after-bss -o b --fatal-warnings
# RUN: llvm-readelf -S -l b | FileCheck %s --check-prefix=CHECK2
+# RUN: ld.lld --section-start=.note=0x200300 a.o -z lrodata-after-bss -o a3
+# RUN: llvm-readelf -S -l -sX a3 | FileCheck %s --check-prefix=CHECK3
+
# CHECK: Name Type Address Off Size ES Flg Lk Inf Al
# CHECK-NEXT: NULL 0000000000000000 000000 000000 00 0 0 0
# CHECK-NEXT: .note NOTE 0000000000200300 000300 000001 00 A 0 0 1
@@ -80,6 +83,39 @@
# CHECK2-NEXT: LOAD 0x000305 0x0000000000200305 0x0000000000200305 0x001805 0x002a06 RW 0x1000
# CHECK2-NEXT: TLS 0x000305 0x0000000000200305 0x0000000000200305 0x000001 0x000003 R 0x1
+# CHECK3: Name Type Address Off Size ES Flg Lk Inf Al
+# CHECK3-NEXT: NULL 0000000000000000 000000 000000 00 0 0 0
+# CHECK3-NEXT: .note NOTE 0000000000200300 000300 000001 00 A 0 0 1
+# CHECK3-NEXT: .rodata PROGBITS 0000000000200301 000301 000001 00 A 0 0 1
+# CHECK3-NEXT: .text PROGBITS 0000000000201304 000304 000001 00 AX 0 0 4
+# CHECK3-NEXT: .tdata PROGBITS 0000000000202305 000305 000001 00 WAT 0 0 1
+# CHECK3-NEXT: .tbss NOBITS 0000000000202306 000306 000002 00 WAT 0 0 1
+# CHECK3-NEXT: .relro_padding NOBITS 0000000000202306 000306 000cfa 00 WA 0 0 1
+# CHECK3-NEXT: .data PROGBITS 0000000000203306 000306 000001 00 WA 0 0 1
+# CHECK3-NEXT: .bss NOBITS 0000000000203307 000307 001800 00 WA 0 0 1
+## We spend (size(.bss) + size(.lbss)) % MAXPAGESIZE bytes.
+# CHECK3-NEXT: .lbss NOBITS 0000000000204b07 000307 001201 00 WAl 0 0 1
+# CHECK3-NEXT: .lrodata PROGBITS 0000000000206d08 000d08 000002 00 Al 0 0 1
+# CHECK3-NEXT: .ldata PROGBITS 0000000000207d0a 000d0a 000002 00 WAl 0 0 1
+# CHECK3-NEXT: .ldata2 PROGBITS 0000000000207d0c 000d0c 000001 00 WAl 0 0 1
+# CHECK3-NEXT: .comment PROGBITS 0000000000000000 000d0d {{.*}} 01 MS 0 0 1
+
+# CHECK3: Program Headers:
+# CHECK3-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
+# CHECK3-NEXT: PHDR 0x000040 0x0000000000200040 0x0000000000200040 {{.*}} {{.*}} R 0x8
+# CHECK3-NEXT: LOAD 0x000000 0x0000000000200000 0x0000000000200000 0x000302 0x000302 R 0x1000
+# CHECK3-NEXT: LOAD 0x000304 0x0000000000201304 0x0000000000201304 0x000001 0x000001 R E 0x1000
+# CHECK3-NEXT: LOAD 0x000305 0x0000000000202305 0x0000000000202305 0x000001 0x000cfb RW 0x1000
+# CHECK3-NEXT: LOAD 0x000306 0x0000000000203306 0x0000000000203306 0x000001 0x002a02 RW 0x1000
+# CHECK3-NEXT: LOAD 0x000d08 0x0000000000206d08 0x0000000000206d08 0x000002 0x000002 R 0x1000
+# CHECK3-NEXT: LOAD 0x000d0a 0x0000000000207d0a 0x0000000000207d0a 0x000003 0x000003 RW 0x1000
+# CHECK3-NEXT: TLS 0x000305 0x0000000000202305 0x0000000000202305 0x000001 0x000003 R 0x1
+
+# CHECK3: 0000000000201304 0 NOTYPE GLOBAL DEFAULT [[#]] (.text) _start
+# CHECK3-NEXT: 0000000000201305 0 NOTYPE GLOBAL DEFAULT [[#]] (.text) _etext
+# CHECK3-NEXT: 0000000000203307 0 NOTYPE GLOBAL DEFAULT [[#]] (.data) _edata
+# CHECK3-NEXT: 0000000000207d0d 0 NOTYPE GLOBAL DEFAULT [[#]] (.ldata2) _end
+
#--- a.s
.globl _start, _etext, _edata, _end
_start: