@@ -477,8 +477,8 @@ fn remove_cycle(
477
477
/// Detects query cycles by using depth first search over all active query jobs.
478
478
/// If a query cycle is found it will break the cycle by finding an edge which
479
479
/// uses a query latch and then resuming that waiter.
480
- /// There may be multiple cycles involved in a deadlock, so this searches
481
- /// all active queries for cycles before finally resuming all the waiters at once.
480
+ /// There may be multiple cycles involved in a deadlock, but we only search
481
+ /// one cycle at a call and resume one waiter at once. See `FIXME` below .
482
482
pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483
483
let mut wakelist = Vec :: new ( ) ;
484
484
let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
@@ -488,6 +488,19 @@ pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry)
488
488
while jobs. len ( ) > 0 {
489
489
if remove_cycle ( & query_map, & mut jobs, & mut wakelist) {
490
490
found_cycle = true ;
491
+
492
+ // FIXME(#137731): Resume all the waiters at once may cause deadlocks,
493
+ // so we resume one waiter at a call for now. It's still unclear whether
494
+ // it's due to possible issues in rustc-rayon or instead in the handling
495
+ // of query cycles.
496
+ // This seem to only appear when multiple query cycles errors
497
+ // are involved, so this reduction in parallelism, while suboptimal, is not
498
+ // universal and only the deadlock handler will encounter these cases.
499
+ // The workaround shows loss of potential gains, but there still are big
500
+ // improvements in the common case, and no regressions compared to the
501
+ // single-threaded case. More investigation is still needed, and once fixed,
502
+ // we can wake up all the waiters up.
503
+ break ;
491
504
}
492
505
}
493
506
0 commit comments