Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b75b67f

Browse files
committedFeb 19, 2025
Add a .bss-like scheme for encoded const allocs
1 parent 3b022d8 commit b75b67f

File tree

2 files changed

+117
-4
lines changed

2 files changed

+117
-4
lines changed
 

‎compiler/rustc_abi/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ impl Align {
791791
}
792792

793793
#[inline]
794-
pub fn bytes(self) -> u64 {
794+
pub const fn bytes(self) -> u64 {
795795
1 << self.pow2
796796
}
797797

@@ -801,7 +801,7 @@ impl Align {
801801
}
802802

803803
#[inline]
804-
pub fn bits(self) -> u64 {
804+
pub const fn bits(self) -> u64 {
805805
self.bytes() * 8
806806
}
807807

‎compiler/rustc_middle/src/mir/interpret/allocation.rs

+115-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ use provenance_map::*;
1515
use rustc_abi::{Align, HasDataLayout, Size};
1616
use rustc_ast::Mutability;
1717
use rustc_data_structures::intern::Interned;
18-
use rustc_macros::{HashStable, TyDecodable, TyEncodable};
18+
use rustc_macros::HashStable;
19+
use rustc_serialize::{Decodable, Encodable};
20+
use rustc_type_ir::{TyDecoder, TyEncoder};
1921

2022
use super::{
2123
AllocId, BadBytesAccess, CtfeProvenance, InterpErrorKind, InterpResult, Pointer,
@@ -77,7 +79,7 @@ impl AllocBytes for Box<[u8]> {
7779
/// module provides higher-level access.
7880
// Note: for performance reasons when interning, some of the `Allocation` fields can be partially
7981
// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
80-
#[derive(Clone, Eq, PartialEq, TyEncodable, TyDecodable)]
82+
#[derive(Clone, Eq, PartialEq)]
8183
#[derive(HashStable)]
8284
pub struct Allocation<Prov: Provenance = CtfeProvenance, Extra = (), Bytes = Box<[u8]>> {
8385
/// The actual bytes of the allocation.
@@ -101,6 +103,117 @@ pub struct Allocation<Prov: Provenance = CtfeProvenance, Extra = (), Bytes = Box
101103
pub extra: Extra,
102104
}
103105

106+
/// Helper struct that packs an alignment, mutability, and "all bytes are zero" flag together.
107+
///
108+
/// Alignment values always have 2 free high bits, and we check for this in our [`Encodable`] impl.
109+
struct AllocFlags {
110+
align: Align,
111+
mutability: Mutability,
112+
all_zero: bool,
113+
}
114+
115+
impl<E: TyEncoder> Encodable<E> for AllocFlags {
116+
fn encode(&self, encoder: &mut E) {
117+
// Make sure Align::MAX can be stored with the high 2 bits unset.
118+
const {
119+
let max_supported_align_repr = u8::MAX >> 2;
120+
let max_supported_align = 1 << max_supported_align_repr;
121+
assert!(Align::MAX.bytes() <= max_supported_align)
122+
}
123+
124+
let mut flags = self.align.bytes().trailing_zeros() as u8;
125+
flags |= match self.mutability {
126+
Mutability::Not => 0,
127+
Mutability::Mut => 1 << 6,
128+
};
129+
flags |= (self.all_zero as u8) << 7;
130+
flags.encode(encoder);
131+
}
132+
}
133+
134+
impl<D: TyDecoder> Decodable<D> for AllocFlags {
135+
fn decode(decoder: &mut D) -> Self {
136+
let flags: u8 = Decodable::decode(decoder);
137+
let align = flags & 0b0011_1111;
138+
let mutability = flags & 0b0100_0000;
139+
let all_zero = flags & 0b1000_0000;
140+
141+
let align = Align::from_bytes(1 << align).unwrap();
142+
let mutability = match mutability {
143+
0 => Mutability::Not,
144+
_ => Mutability::Mut,
145+
};
146+
let all_zero = all_zero > 0;
147+
148+
AllocFlags { align, mutability, all_zero }
149+
}
150+
}
151+
152+
/// Efficiently detect whether a slice of `u8` is all zero.
153+
///
154+
/// This is used in encoding of [`Allocation`] to special-case all-zero allocations. It is only
155+
/// optimized a little, because for many allocations the encoding of the actual bytes does not
156+
/// dominate runtime.
157+
#[inline]
158+
fn all_zero(buf: &[u8]) -> bool {
159+
// In the empty case we wouldn't encode any contents even without this system where we
160+
// special-case allocations whose contents are all 0. We can return anything in the empty case.
161+
if buf.is_empty() {
162+
return true;
163+
}
164+
// Just fast-rejecting based on the first element significantly reduces the amount that we end
165+
// up walking the whole array.
166+
if buf[0] != 0 {
167+
return false;
168+
}
169+
170+
// This strategy of combining all slice elements with & or | is unbeatable for the large
171+
// all-zero case because it is so well-understood by autovectorization.
172+
buf.iter().fold(true, |acc, b| acc & (*b == 0))
173+
}
174+
175+
/// Custom encoder for [`Allocation`] to more efficiently represent the case where all bytes are 0.
176+
impl<Prov: Provenance, Extra, Bytes, E: TyEncoder> Encodable<E> for Allocation<Prov, Extra, Bytes>
177+
where
178+
Bytes: AllocBytes,
179+
ProvenanceMap<Prov>: Encodable<E>,
180+
Extra: Encodable<E>,
181+
{
182+
fn encode(&self, encoder: &mut E) {
183+
let all_zero = all_zero(&self.bytes);
184+
AllocFlags { align: self.align, mutability: self.mutability, all_zero }.encode(encoder);
185+
186+
encoder.emit_usize(self.bytes.len());
187+
if !all_zero {
188+
encoder.emit_raw_bytes(&self.bytes);
189+
}
190+
self.provenance.encode(encoder);
191+
self.init_mask.encode(encoder);
192+
self.extra.encode(encoder);
193+
}
194+
}
195+
196+
impl<Prov: Provenance, Extra, Bytes, D: TyDecoder> Decodable<D> for Allocation<Prov, Extra, Bytes>
197+
where
198+
Bytes: AllocBytes,
199+
ProvenanceMap<Prov>: Decodable<D>,
200+
Extra: Decodable<D>,
201+
{
202+
fn decode(decoder: &mut D) -> Self {
203+
let AllocFlags { align, mutability, all_zero } = Decodable::decode(decoder);
204+
205+
let len = decoder.read_usize();
206+
let bytes = if all_zero { vec![0u8; len] } else { decoder.read_raw_bytes(len).to_vec() };
207+
let bytes = Bytes::from_bytes(bytes, align);
208+
209+
let provenance = Decodable::decode(decoder);
210+
let init_mask = Decodable::decode(decoder);
211+
let extra = Decodable::decode(decoder);
212+
213+
Self { bytes, provenance, init_mask, align, mutability, extra }
214+
}
215+
}
216+
104217
/// This is the maximum size we will hash at a time, when interning an `Allocation` and its
105218
/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
106219
/// Used when these two structures are large: we only partially hash the larger fields in that

0 commit comments

Comments
 (0)
Failed to load comments.