Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 69c35dc

Browse files
committedFeb 1, 2025
Auto merge of #136410 - saethlin:clean-up-cgu-internal-copy, r=<try>
Remove InstanceKind::generates_cgu_internal_copy This is on top of #136394 for now. `InstanceKind::generates_cgu_internal_copy` was called in two places: 1. By `reachable_non_generics`, but in that case the caller was guaranteed to not be generic and therefore we always fall through to just calling `cross_crate_inlinable`. 2. By `MonoItem::instantiation_mode`, but in that case the *only* thing that it does is some very complicated logic for selecting the instantiation mode for drop glue, and only very recently do we have a single codegen test for this. So I've touched up the logic in `cross_crate_inlinable` so that we don't try to claim that `drop_in_place` isn't cross-crate-inlinable, because it clearly is. Now off to perf, and if we get regressions I'll start reintroducing the logic about drop glue but, but this time into `MonoItem::instantiation_mode` where it always should have been.
2 parents 8239a37 + 6e9db01 commit 69c35dc

File tree

4 files changed

+80
-96
lines changed

4 files changed

+80
-96
lines changed
 

‎compiler/rustc_codegen_ssa/src/back/symbol_export.rs

+1-10
Original file line numberDiff line numberDiff line change
@@ -93,16 +93,7 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
9393
return None;
9494
}
9595

96-
// Functions marked with #[inline] are codegened with "internal"
97-
// linkage and are not exported unless marked with an extern
98-
// indicator
99-
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
100-
|| tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
101-
{
102-
Some(def_id)
103-
} else {
104-
None
105-
}
96+
if tcx.cross_crate_inlinable(def_id) { None } else { Some(def_id) }
10697
})
10798
.map(|def_id| {
10899
// We won't link right if this symbol is stripped during LTO.

‎compiler/rustc_middle/src/mir/mono.rs

+73-42
Original file line numberDiff line numberDiff line change
@@ -92,51 +92,82 @@ impl<'tcx> MonoItem<'tcx> {
9292
}
9393

9494
pub fn instantiation_mode(&self, tcx: TyCtxt<'tcx>) -> InstantiationMode {
95-
let generate_cgu_internal_copies =
96-
(tcx.sess.opts.optimize != OptLevel::No) && !tcx.sess.link_dead_code();
95+
// The case handling here is written in the same style as cross_crate_inlinable, we first
96+
// handle the cases where we must use a particular instantiation mode, then cascade down
97+
// through a sequence of heuristics.
9798

98-
match *self {
99-
MonoItem::Fn(ref instance) => {
100-
let entry_def_id = tcx.entry_fn(()).map(|(id, _)| id);
101-
// If this function isn't inlined or otherwise has an extern
102-
// indicator, then we'll be creating a globally shared version.
103-
let codegen_fn_attrs = tcx.codegen_fn_attrs(instance.def_id());
104-
if codegen_fn_attrs.contains_extern_indicator()
105-
|| codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED)
106-
|| !instance.def.generates_cgu_internal_copy(tcx)
107-
|| Some(instance.def_id()) == entry_def_id
108-
{
109-
return InstantiationMode::GloballyShared { may_conflict: false };
110-
}
111-
112-
if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline
113-
&& self.is_generic_fn()
114-
{
115-
// Upgrade inline(never) to a globally shared instance.
116-
return InstantiationMode::GloballyShared { may_conflict: true };
117-
}
118-
119-
// At this point we don't have explicit linkage and we're an
120-
// inlined function. If this crate's build settings permit,
121-
// we'll be creating a local copy per CGU.
122-
if generate_cgu_internal_copies {
123-
return InstantiationMode::LocalCopy;
124-
}
125-
126-
// Finally, if this is `#[inline(always)]` we're sure to respect
127-
// that with an inline copy per CGU, but otherwise we'll be
128-
// creating one copy of this `#[inline]` function which may
129-
// conflict with upstream crates as it could be an exported
130-
// symbol.
131-
if tcx.codegen_fn_attrs(instance.def_id()).inline.always() {
132-
InstantiationMode::LocalCopy
133-
} else {
134-
InstantiationMode::GloballyShared { may_conflict: true }
135-
}
136-
}
99+
// The first thing we do is detect MonoItems which we must instantiate exactly once in the
100+
// whole program.
101+
102+
// Statics and global_asm! must be instantiated exactly once.
103+
let instance = match *self {
104+
MonoItem::Fn(instance) => instance,
137105
MonoItem::Static(..) | MonoItem::GlobalAsm(..) => {
138-
InstantiationMode::GloballyShared { may_conflict: false }
106+
return InstantiationMode::GloballyShared { may_conflict: false };
139107
}
108+
};
109+
110+
// Similarly, the executable entrypoint must be instantiated exactly once.
111+
if let Some((entry_def_id, _)) = tcx.entry_fn(()) {
112+
if instance.def_id() == entry_def_id {
113+
return InstantiationMode::GloballyShared { may_conflict: false };
114+
}
115+
}
116+
117+
// If the function is #[naked] or contains any other attribute that requires exactly-once
118+
// instantiation:
119+
let codegen_fn_attrs = tcx.codegen_fn_attrs(instance.def_id());
120+
if codegen_fn_attrs.contains_extern_indicator()
121+
|| codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED)
122+
{
123+
return InstantiationMode::GloballyShared { may_conflict: false };
124+
}
125+
126+
// We need to ensure that we do not decide the InstantiationMode of an exported symbol is
127+
// LocalCopy. Since exported symbols are computed based on the output of
128+
// cross_crate_inlinable, we are beholden to our previous decisions.
129+
if !tcx.cross_crate_inlinable(instance.def_id()) {
130+
return InstantiationMode::GloballyShared { may_conflict: false };
131+
}
132+
133+
// Beginning of heuristics. The handling of link-dead-code and inline(always) are QoL only,
134+
// the compiler should not crash and linkage should work, but codegen may be undesirable.
135+
136+
// -Clink-dead-code was given an unfortunate name; the point of the flag is to assist
137+
// coverage tools which rely on having every function in the program appear in the
138+
// generated code. If we select LocalCopy, functions which are not used because they are
139+
// missing test coverage will disappear from such coverage reports, defeating the point.
140+
// Note that -Cinstrument-coverage does not require such assistance from us, only coverage
141+
// tools implemented without compiler support ironically require a special compiler flag.
142+
if tcx.sess.link_dead_code() {
143+
return InstantiationMode::GloballyShared { may_conflict: true };
144+
}
145+
146+
// To ensure that #[inline(always)] can be inlined as much as possible, especially in unoptimized
147+
// builds, we always select LocalCopy.
148+
if codegen_fn_attrs.inline.always() {
149+
return InstantiationMode::LocalCopy;
150+
}
151+
152+
// #[inline(never)] functions in general are poor candidates for inlining and thus since
153+
// LocalCopy generally increases code size for the benefit of optimizations from inlining,
154+
// we want to give them GloballyShared codegen.
155+
// The slight problem is that generic functions need to always support cross-crate
156+
// compilation, so all previous stages of the compiler are obligated to treat generic
157+
// functions the same as those that unconditionally get LocalCopy codegen. It's only when
158+
// we get here that we can at least not codegen a #[inline(never)] generic function in all
159+
// of our CGUs.
160+
if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline
161+
&& self.is_generic_fn()
162+
{
163+
return InstantiationMode::GloballyShared { may_conflict: true };
164+
}
165+
166+
// The fallthrough case is to generate LocalCopy for all optimized builds, and
167+
// GloballyShared with conflict prevention when optimizations are disabled.
168+
match tcx.sess.opts.optimize {
169+
OptLevel::No => InstantiationMode::GloballyShared { may_conflict: true },
170+
_ => InstantiationMode::LocalCopy,
140171
}
141172
}
142173

‎compiler/rustc_middle/src/ty/instance.rs

-44
Original file line numberDiff line numberDiff line change
@@ -301,50 +301,6 @@ impl<'tcx> InstanceKind<'tcx> {
301301
)
302302
}
303303

304-
/// Returns `true` if the machine code for this instance is instantiated in
305-
/// each codegen unit that references it.
306-
/// Note that this is only a hint! The compiler can globally decide to *not*
307-
/// do this in order to speed up compilation. CGU-internal copies are
308-
/// only exist to enable inlining. If inlining is not performed (e.g. at
309-
/// `-Copt-level=0`) then the time for generating them is wasted and it's
310-
/// better to create a single copy with external linkage.
311-
pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
312-
if self.requires_inline(tcx) {
313-
return true;
314-
}
315-
if let ty::InstanceKind::DropGlue(.., Some(ty))
316-
| ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
317-
{
318-
// Drop glue generally wants to be instantiated at every codegen
319-
// unit, but without an #[inline] hint. We should make this
320-
// available to normal end-users.
321-
if tcx.sess.opts.incremental.is_none() {
322-
return true;
323-
}
324-
// When compiling with incremental, we can generate a *lot* of
325-
// codegen units. Including drop glue into all of them has a
326-
// considerable compile time cost.
327-
//
328-
// We include enums without destructors to allow, say, optimizing
329-
// drops of `Option::None` before LTO. We also respect the intent of
330-
// `#[inline]` on `Drop::drop` implementations.
331-
return ty.ty_adt_def().is_none_or(|adt_def| {
332-
match *self {
333-
ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
334-
ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
335-
adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
336-
}
337-
_ => unreachable!(),
338-
}
339-
.map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
340-
});
341-
}
342-
if let ty::InstanceKind::ThreadLocalShim(..) = *self {
343-
return false;
344-
}
345-
tcx.cross_crate_inlinable(self.def_id())
346-
}
347-
348304
pub fn requires_caller_location(&self, tcx: TyCtxt<'_>) -> bool {
349305
match *self {
350306
InstanceKind::Item(def_id) | InstanceKind::Virtual(def_id, _) => {

‎compiler/rustc_mir_transform/src/cross_crate_inline.rs

+6
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
7575
return false;
7676
}
7777

78+
if let Some(drop_in_place) = tcx.lang_items().drop_in_place_fn() {
79+
if rustc_hir::def_id::DefId::from(def_id) == drop_in_place {
80+
return true;
81+
}
82+
}
83+
7884
if !tcx.is_mir_available(def_id) {
7985
return false;
8086
}

0 commit comments

Comments
 (0)
Failed to load comments.