Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f949c9f

Browse files
committedMar 14, 2025
Auto merge of rust-lang#138504 - bjorn3:string_merging_rust_strings, r=<try>
Nul terminate rust string literals This allows taking advantage of the C string merging functionality of linkers, reducing code size. Marked as draft to see if this actually has much of an effect. The disadvantage of this is that people may start to rely on string literals getting nul terminated. A potential solution for that would be to put a byte that is not part of a valid UTF-8 character right before the nul terminator. Builds on rust-lang#138503
2 parents f7b4354 + 7c1cd9b commit f949c9f

File tree

12 files changed

+72
-54
lines changed

12 files changed

+72
-54
lines changed
 

‎compiler/rustc_codegen_gcc/src/consts.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ pub fn const_alloc_to_gcc<'gcc>(
364364
llvals.push(cx.const_bytes(bytes));
365365
}
366366

367-
cx.const_struct(&llvals, true)
367+
if let &[data] = &*llvals { data } else { cx.const_struct(&llvals, true) }
368368
}
369369

370370
fn codegen_static_initializer<'gcc, 'tcx>(

‎compiler/rustc_codegen_llvm/src/consts.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ pub(crate) fn const_alloc_to_llvm<'ll>(
128128
append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, range);
129129
}
130130

131-
cx.const_struct(&llvals, true)
131+
if let &[data] = &*llvals { data } else { cx.const_struct(&llvals, true) }
132132
}
133133

134134
fn codegen_static_initializer<'ll, 'tcx>(

‎compiler/rustc_const_eval/src/interpret/intrinsics.rs

+12-5
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,17 @@ use super::{
2424
use crate::fluent_generated as fluent;
2525

2626
/// Directly returns an `Allocation` containing an absolute path representation of the given type.
27-
pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ConstAllocation<'tcx> {
28-
let path = crate::util::type_name(tcx, ty);
27+
pub(crate) fn alloc_type_name<'tcx>(
28+
tcx: TyCtxt<'tcx>,
29+
ty: Ty<'tcx>,
30+
) -> (ConstAllocation<'tcx>, u64) {
31+
let mut path = crate::util::type_name(tcx, ty);
32+
let path_len = path.len().try_into().unwrap();
33+
if !path.contains('\0') {
34+
path.push('\0');
35+
};
2936
let alloc = Allocation::from_bytes_byte_aligned_immutable(path.into_bytes());
30-
tcx.mk_const_alloc(alloc)
37+
(tcx.mk_const_alloc(alloc), path_len)
3138
}
3239

3340
/// The logic for all nullary intrinsics is implemented here. These intrinsics don't get evaluated
@@ -43,8 +50,8 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>(
4350
interp_ok(match name {
4451
sym::type_name => {
4552
ensure_monomorphic_enough(tcx, tp_ty)?;
46-
let alloc = alloc_type_name(tcx, tp_ty);
47-
ConstValue::Slice { data: alloc, meta: alloc.inner().size().bytes() }
53+
let (alloc, path_len) = alloc_type_name(tcx, tp_ty);
54+
ConstValue::Slice { data: alloc, meta: path_len }
4855
}
4956
sym::needs_drop => {
5057
ensure_monomorphic_enough(tcx, tp_ty)?;

‎compiler/rustc_const_eval/src/interpret/place.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -1019,11 +1019,16 @@ where
10191019
&mut self,
10201020
s: &str,
10211021
) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> {
1022-
let bytes = s.as_bytes();
1023-
let ptr = self.allocate_bytes_dedup(bytes)?;
1022+
let ptr = if !s.contains('\0') {
1023+
let mut bytes = s.as_bytes().to_owned();
1024+
bytes.push(0);
1025+
self.allocate_bytes_dedup(&bytes)?
1026+
} else {
1027+
self.allocate_bytes_dedup(s.as_bytes())?
1028+
};
10241029

10251030
// Create length metadata for the string.
1026-
let meta = Scalar::from_target_usize(u64::try_from(bytes.len()).unwrap(), self);
1031+
let meta = Scalar::from_target_usize(u64::try_from(s.len()).unwrap(), self);
10271032

10281033
// Get layout for Rust's str type.
10291034
let layout = self.layout_of(self.tcx.types.str_).unwrap();

‎compiler/rustc_mir_build/src/builder/expr/as_constant.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,15 @@ fn lit_to_mir_constant<'tcx>(tcx: TyCtxt<'tcx>, lit_input: LitToConstInput<'tcx>
125125
let value = match (lit, lit_ty.kind()) {
126126
(ast::LitKind::Str(s, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_str() => {
127127
let s = s.as_str();
128-
let allocation = Allocation::from_bytes_byte_aligned_immutable(s.as_bytes());
128+
let allocation = if !s.contains('\0') {
129+
let mut s = s.to_owned();
130+
s.push('\0');
131+
Allocation::from_bytes_byte_aligned_immutable(s.as_bytes())
132+
} else {
133+
Allocation::from_bytes_byte_aligned_immutable(s.as_bytes())
134+
};
129135
let allocation = tcx.mk_const_alloc(allocation);
130-
ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() }
136+
ConstValue::Slice { data: allocation, meta: s.len().try_into().unwrap() }
131137
}
132138
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
133139
if matches!(inner_ty.kind(), ty::Slice(_)) =>

‎tests/codegen/debug-vtable.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
// Make sure that vtables don't have the unnamed_addr attribute when debuginfo is enabled.
1717
// This helps debuggers more reliably map from dyn pointer to concrete type.
18-
// CHECK: @vtable.2 = private constant <{
18+
// CHECK: @vtable.2 = private constant [
1919
// CHECK: @vtable.3 = private constant <{
2020
// CHECK: @vtable.4 = private constant <{
2121

‎tests/codegen/external-no-mangle-statics.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -6,72 +6,72 @@
66
// `#[no_mangle]`d static variables always have external linkage, i.e., no `internal` in their
77
// definitions
88

9-
// CHECK: @A = {{(dso_local )?}}local_unnamed_addr constant
9+
// CHECK-DAG: @A = {{(dso_local )?}}local_unnamed_addr constant
1010
#[no_mangle]
1111
static A: u8 = 0;
1212

13-
// CHECK: @B = {{(dso_local )?}}local_unnamed_addr global
13+
// CHECK-DAG: @B = {{(dso_local )?}}local_unnamed_addr global
1414
#[no_mangle]
1515
static mut B: u8 = 0;
1616

17-
// CHECK: @C = {{(dso_local )?}}local_unnamed_addr constant
17+
// CHECK-DAG: @C = {{(dso_local )?}}local_unnamed_addr constant
1818
#[no_mangle]
1919
pub static C: u8 = 0;
2020

21-
// CHECK: @D = {{(dso_local )?}}local_unnamed_addr global
21+
// CHECK-DAG: @D = {{(dso_local )?}}local_unnamed_addr global
2222
#[no_mangle]
2323
pub static mut D: u8 = 0;
2424

2525
mod private {
26-
// CHECK: @E = {{(dso_local )?}}local_unnamed_addr constant
26+
// CHECK-DAG: @E = {{(dso_local )?}}local_unnamed_addr constant
2727
#[no_mangle]
2828
static E: u8 = 0;
2929

30-
// CHECK: @F = {{(dso_local )?}}local_unnamed_addr global
30+
// CHECK-DAG: @F = {{(dso_local )?}}local_unnamed_addr global
3131
#[no_mangle]
3232
static mut F: u8 = 0;
3333

34-
// CHECK: @G = {{(dso_local )?}}local_unnamed_addr constant
34+
// CHECK-DAG: @G = {{(dso_local )?}}local_unnamed_addr constant
3535
#[no_mangle]
3636
pub static G: u8 = 0;
3737

38-
// CHECK: @H = {{(dso_local )?}}local_unnamed_addr global
38+
// CHECK-DAG: @H = {{(dso_local )?}}local_unnamed_addr global
3939
#[no_mangle]
4040
pub static mut H: u8 = 0;
4141
}
4242

4343
const HIDDEN: () = {
44-
// CHECK: @I = {{(dso_local )?}}local_unnamed_addr constant
44+
// CHECK-DAG: @I = {{(dso_local )?}}local_unnamed_addr constant
4545
#[no_mangle]
4646
static I: u8 = 0;
4747

48-
// CHECK: @J = {{(dso_local )?}}local_unnamed_addr global
48+
// CHECK-DAG: @J = {{(dso_local )?}}local_unnamed_addr global
4949
#[no_mangle]
5050
static mut J: u8 = 0;
5151

52-
// CHECK: @K = {{(dso_local )?}}local_unnamed_addr constant
52+
// CHECK-DAG: @K = {{(dso_local )?}}local_unnamed_addr constant
5353
#[no_mangle]
5454
pub static K: u8 = 0;
5555

56-
// CHECK: @L = {{(dso_local )?}}local_unnamed_addr global
56+
// CHECK-DAG: @L = {{(dso_local )?}}local_unnamed_addr global
5757
#[no_mangle]
5858
pub static mut L: u8 = 0;
5959
};
6060

6161
fn x() {
62-
// CHECK: @M = {{(dso_local )?}}local_unnamed_addr constant
62+
// CHECK-DAG: @M = {{(dso_local )?}}local_unnamed_addr constant
6363
#[no_mangle]
6464
static M: fn() = x;
6565

66-
// CHECK: @N = {{(dso_local )?}}local_unnamed_addr global
66+
// CHECK-DAG: @N = {{(dso_local )?}}local_unnamed_addr global
6767
#[no_mangle]
6868
static mut N: u8 = 0;
6969

70-
// CHECK: @O = {{(dso_local )?}}local_unnamed_addr constant
70+
// CHECK-DAG: @O = {{(dso_local )?}}local_unnamed_addr constant
7171
#[no_mangle]
7272
pub static O: u8 = 0;
7373

74-
// CHECK: @P = {{(dso_local )?}}local_unnamed_addr global
74+
// CHECK-DAG: @P = {{(dso_local )?}}local_unnamed_addr global
7575
#[no_mangle]
7676
pub static mut P: u8 = 0;
7777
}

‎tests/codegen/link_section.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
#![crate_type = "lib"]
55

6-
// CHECK: @VAR1 = {{(dso_local )?}}constant <{ [4 x i8] }> <{ [4 x i8] c"\01\00\00\00" }>, section ".test_one"
6+
// CHECK: @VAR1 = {{(dso_local )?}}constant [4 x i8] c"\01\00\00\00", section ".test_one"
77
#[no_mangle]
88
#[link_section = ".test_one"]
99
#[cfg(target_endian = "little")]

‎tests/codegen/remap_path_prefix/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ mod aux_mod;
1212
include!("aux_mod.rs");
1313

1414
// Here we check that the expansion of the file!() macro is mapped.
15-
// CHECK: @alloc_5761061597a97f66e13ef2ff92712c4b = private unnamed_addr constant <{ [34 x i8] }> <{ [34 x i8] c"/the/src/remap_path_prefix/main.rs" }>
15+
// CHECK: @alloc_4079a2e7607f89f86df6b8a72ba0dd06 = private unnamed_addr constant [35 x i8] c"/the/src/remap_path_prefix/main.rs\00"
1616
pub static FILE_PATH: &'static str = file!();
1717

1818
fn main() {

‎tests/codegen/uninit-consts.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ pub struct PartiallyUninit {
1111
y: MaybeUninit<[u8; 10]>,
1212
}
1313

14-
// CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant <{ [10 x i8] }> undef
14+
// CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant [10 x i8] undef
1515

1616
// CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
1717

1818
// This shouldn't contain undef, since it contains more chunks
1919
// than the default value of uninit_const_chunk_threshold.
20-
// CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4
20+
// CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant [32768 x i8] c"{{.+}}", align 4
2121

22-
// CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant <{ [16384 x i8] }> undef
22+
// CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant [16384 x i8] undef
2323

2424
// CHECK-LABEL: @fully_uninit
2525
#[no_mangle]

‎tests/mir-opt/const_allocation.main.GVN.after.32bit.mir

+10-10
Original file line numberDiff line numberDiff line change
@@ -33,27 +33,27 @@ ALLOC2 (size: 16, align: 4) {
3333
╾ALLOC4<imm>╼ 03 00 00 00 ╾ALLOC5<imm>╼ 03 00 00 00 │ ╾──╼....╾──╼....
3434
}
3535

36-
ALLOC4 (size: 3, align: 1) {
37-
66 6f 6f │ foo
36+
ALLOC4 (size: 4, align: 1) {
37+
66 6f 6f 00 │ foo.
3838
}
3939

40-
ALLOC5 (size: 3, align: 1) {
41-
62 61 72 │ bar
40+
ALLOC5 (size: 4, align: 1) {
41+
62 61 72 00 │ bar.
4242
}
4343

4444
ALLOC3 (size: 24, align: 4) {
4545
0x00 │ ╾ALLOC6<imm>╼ 03 00 00 00 ╾ALLOC7<imm>╼ 03 00 00 00 │ ╾──╼....╾──╼....
4646
0x10 │ ╾ALLOC8<imm>╼ 04 00 00 00 │ ╾──╼....
4747
}
4848

49-
ALLOC6 (size: 3, align: 1) {
50-
6d 65 68 │ meh
49+
ALLOC6 (size: 4, align: 1) {
50+
6d 65 68 00 │ meh.
5151
}
5252

53-
ALLOC7 (size: 3, align: 1) {
54-
6d 6f 70 │ mop
53+
ALLOC7 (size: 4, align: 1) {
54+
6d 6f 70 00 │ mop.
5555
}
5656

57-
ALLOC8 (size: 4, align: 1) {
58-
6d c3 b6 70 │ m..p
57+
ALLOC8 (size: 5, align: 1) {
58+
6d c3 b6 70 00 │ m..p.
5959
}

‎tests/mir-opt/const_allocation.main.GVN.after.64bit.mir

+10-10
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ ALLOC2 (size: 32, align: 8) {
3636
0x10 │ ╾ALLOC5<imm>╼ 03 00 00 00 00 00 00 00 │ ╾──────╼........
3737
}
3838

39-
ALLOC4 (size: 3, align: 1) {
40-
66 6f 6f │ foo
39+
ALLOC4 (size: 4, align: 1) {
40+
66 6f 6f 00 │ foo.
4141
}
4242

43-
ALLOC5 (size: 3, align: 1) {
44-
62 61 72 │ bar
43+
ALLOC5 (size: 4, align: 1) {
44+
62 61 72 00 │ bar.
4545
}
4646

4747
ALLOC3 (size: 48, align: 8) {
@@ -50,14 +50,14 @@ ALLOC3 (size: 48, align: 8) {
5050
0x20 │ ╾ALLOC8<imm>╼ 04 00 00 00 00 00 00 00 │ ╾──────╼........
5151
}
5252

53-
ALLOC6 (size: 3, align: 1) {
54-
6d 65 68 │ meh
53+
ALLOC6 (size: 4, align: 1) {
54+
6d 65 68 00 │ meh.
5555
}
5656

57-
ALLOC7 (size: 3, align: 1) {
58-
6d 6f 70 │ mop
57+
ALLOC7 (size: 4, align: 1) {
58+
6d 6f 70 00 │ mop.
5959
}
6060

61-
ALLOC8 (size: 4, align: 1) {
62-
6d c3 b6 70 │ m..p
61+
ALLOC8 (size: 5, align: 1) {
62+
6d c3 b6 70 00 │ m..p.
6363
}

0 commit comments

Comments
 (0)
Failed to load comments.