compiler/rustc_mir_transform/src/coverage/spans.rs - third_party/rust - Git at Google

 use std::collections::VecDeque;
 use std::iter;

 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::mir;
 use rustc_middle::ty::TyCtxt;
 use rustc_span::{DesugaringKind, ExpnKind, MacroKind, Span};
 use tracing::{debug, debug_span, instrument};

 use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
 use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir};
 use crate::coverage::{ExtractedHirInfo, mappings, unexpand};

 mod from_mir;

 pub(super) fn extract_refined_covspans<'tcx>(
     tcx: TyCtxt<'tcx>,
     mir_body: &mir::Body<'tcx>,
     hir_info: &ExtractedHirInfo,
     graph: &CoverageGraph,
     code_mappings: &mut impl Extend<mappings::CodeMapping>,
 ) {
     let &ExtractedHirInfo { body_span, .. } = hir_info;

     let raw_spans = from_mir::extract_raw_spans_from_mir(mir_body, graph);
     let mut covspans = raw_spans
         .into_iter()
         .filter_map(|RawSpanFromMir { raw_span, bcb }| try {
             let (span, expn_kind) =
                 unexpand::unexpand_into_body_span_with_expn_kind(raw_span, body_span)?;
             // Discard any spans that fill the entire body, because they tend
             // to represent compiler-inserted code, e.g. implicitly returning `()`.
             if span.source_equal(body_span) {
                 return None;
             };
             SpanFromMir { span, expn_kind, bcb }
         })
         .collect::<Vec<_>>();

     // Only proceed if we found at least one usable span.
     if covspans.is_empty() {
         return;
     }

     // Also add the adjusted function signature span, if available.
     // Otherwise, add a fake span at the start of the body, to avoid an ugly
     // gap between the start of the body and the first real span.
     // FIXME: Find a more principled way to solve this problem.
     covspans.push(SpanFromMir::for_fn_sig(
         hir_info.fn_sig_span_extended.unwrap_or_else(|| body_span.shrink_to_lo()),
     ));

     // First, perform the passes that need macro information.
     covspans.sort_by(|a, b| graph.cmp_in_dominator_order(a.bcb, b.bcb));
     remove_unwanted_expansion_spans(&mut covspans);
     shrink_visible_macro_spans(tcx, &mut covspans);

     // We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`.
     let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>();

     let compare_covspans = |a: &Covspan, b: &Covspan| {
         compare_spans(a.span, b.span)
             // After deduplication, we want to keep only the most-dominated BCB.
             .then_with(|| graph.cmp_in_dominator_order(a.bcb, b.bcb).reverse())
     };
     covspans.sort_by(compare_covspans);

     // Among covspans with the same span, keep only one,
     // preferring the one with the most-dominated BCB.
     // (Ideally we should try to preserve _all_ non-dominating BCBs, but that
     // requires a lot more complexity in the span refiner, for little benefit.)
     covspans.dedup_by(|b, a| a.span.source_equal(b.span));

     // Sort the holes, and merge overlapping/adjacent holes.
     let mut holes = hir_info
         .hole_spans
         .iter()
         .copied()
         // Discard any holes that aren't directly visible within the body span.
         .filter(|&hole_span| body_span.contains(hole_span) && body_span.eq_ctxt(hole_span))
         .map(|span| Hole { span })
         .collect::<Vec<_>>();
     holes.sort_by(|a, b| compare_spans(a.span, b.span));
     holes.dedup_by(|b, a| a.merge_if_overlapping_or_adjacent(b));

     // Split the covspans into separate buckets that don't overlap any holes.
     let buckets = divide_spans_into_buckets(covspans, &holes);

     for covspans in buckets {
         let _span = debug_span!("processing bucket", ?covspans).entered();

         let mut covspans = remove_unwanted_overlapping_spans(covspans);
         debug!(?covspans, "after removing overlaps");

         // Do one last merge pass, to simplify the output.
         covspans.dedup_by(|b, a| a.merge_if_eligible(b));
         debug!(?covspans, "after merge");

         code_mappings.extend(covspans.into_iter().map(|Covspan { span, bcb }| {
             // Each span produced by the refiner represents an ordinary code region.
             mappings::CodeMapping { span, bcb }
         }));
     }
 }

 /// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
 /// multiple condition/consequent blocks that have the span of the whole macro
 /// invocation, which is unhelpful. Keeping only the first such span seems to
 /// give better mappings, so remove the others.
 ///
 /// Similarly, `await` expands to a branch on the discriminant of `Poll`, which
 /// leads to incorrect coverage if the `Future` is immediately ready (#98712).
 ///
 /// (The input spans should be sorted in BCB dominator order, so that the
 /// retained "first" span is likely to dominate the others.)
 fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) {
     let mut deduplicated_spans = FxHashSet::default();

     covspans.retain(|covspan| {
         match covspan.expn_kind {
             // Retain only the first await-related or macro-expanded covspan with this span.
             Some(ExpnKind::Desugaring(DesugaringKind::Await)) => {
                 deduplicated_spans.insert(covspan.span)
             }
             Some(ExpnKind::Macro(MacroKind::Bang, _)) => deduplicated_spans.insert(covspan.span),
             // Ignore (retain) other spans.
             _ => true,
         }
     });
 }

 /// When a span corresponds to a macro invocation that is visible from the
 /// function body, truncate it to just the macro name plus `!`.
 /// This seems to give better results for code that uses macros.
 fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) {
     let source_map = tcx.sess.source_map();

     for covspan in covspans {
         if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) {
             covspan.span = source_map.span_through_char(covspan.span, '!');
         }
     }
 }

 /// Uses the holes to divide the given covspans into buckets, such that:
 /// - No span in any hole overlaps a bucket (discarding spans if necessary).
 /// - The spans in each bucket are strictly after all spans in previous buckets,
 ///   and strictly before all spans in subsequent buckets.
 ///
 /// The lists of covspans and holes must be sorted.
 /// The resulting buckets are sorted relative to each other, and each bucket's
 /// contents are sorted.
 #[instrument(level = "debug")]
 fn divide_spans_into_buckets(input_covspans: Vec<Covspan>, holes: &[Hole]) -> Vec<Vec<Covspan>> {
     debug_assert!(input_covspans.is_sorted_by(|a, b| compare_spans(a.span, b.span).is_le()));
     debug_assert!(holes.is_sorted_by(|a, b| compare_spans(a.span, b.span).is_le()));

     // Now we're ready to start grouping spans into buckets separated by holes.

     let mut input_covspans = VecDeque::from(input_covspans);

     // For each hole:
     // - Identify the spans that are entirely or partly before the hole.
     // - Discard any that overlap with the hole.
     // - Add the remaining identified spans to the corresponding bucket.
     let mut buckets = (0..holes.len()).map(|_| vec![]).collect::<Vec<_>>();
     for (hole, bucket) in holes.iter().zip(&mut buckets) {
         bucket.extend(
             drain_front_while(&mut input_covspans, |c| c.span.lo() < hole.span.hi())
                 .filter(|c| !c.span.overlaps(hole.span)),
         );
     }

     // Any remaining spans form their own final bucket, after the final hole.
     // (If there were no holes, this will just be all of the initial spans.)
     buckets.push(Vec::from(input_covspans));

     buckets
 }

 /// Similar to `.drain(..)`, but stops just before it would remove an item not
 /// satisfying the predicate.
 fn drain_front_while<'a, T>(
     queue: &'a mut VecDeque<T>,
     mut pred_fn: impl FnMut(&T) -> bool,
 ) -> impl Iterator<Item = T> {
     iter::from_fn(move || queue.pop_front_if(|x| pred_fn(x)))
 }

 /// Takes one of the buckets of (sorted) spans extracted from MIR, and "refines"
 /// those spans by removing spans that overlap in unwanted ways.
 #[instrument(level = "debug")]
 fn remove_unwanted_overlapping_spans(sorted_spans: Vec<Covspan>) -> Vec<Covspan> {
     debug_assert!(sorted_spans.is_sorted_by(|a, b| compare_spans(a.span, b.span).is_le()));

     // Holds spans that have been read from the input vector, but haven't yet
     // been committed to the output vector.
     let mut pending = vec![];
     let mut refined = vec![];

     for curr in sorted_spans {
         pending.retain(|prev: &Covspan| {
             if prev.span.hi() <= curr.span.lo() {
                 // There's no overlap between the previous/current covspans,
                 // so move the previous one into the refined list.
                 refined.push(prev.clone());
                 false
             } else {
                 // Otherwise, retain the previous covspan only if it has the
                 // same BCB. This tends to discard long outer spans that enclose
                 // smaller inner spans with different control flow.
                 prev.bcb == curr.bcb
             }
         });
         pending.push(curr);
     }

     // Drain the rest of the pending list into the refined list.
     refined.extend(pending);
     refined
 }

 #[derive(Clone, Debug)]
 struct Covspan {
     span: Span,
     bcb: BasicCoverageBlock,
 }

 impl Covspan {
     /// If `self` and `other` can be merged (i.e. they have the same BCB),
     /// mutates `self.span` to also include `other.span` and returns true.
     ///
     /// Note that compatible covspans can be merged even if their underlying
     /// spans are not overlapping/adjacent; any space between them will also be
     /// part of the merged covspan.
     fn merge_if_eligible(&mut self, other: &Self) -> bool {
         if self.bcb != other.bcb {
             return false;
         }

         self.span = self.span.to(other.span);
         true
     }
 }

 /// Compares two spans in (lo ascending, hi descending) order.
 fn compare_spans(a: Span, b: Span) -> std::cmp::Ordering {
     // First sort by span start.
     Ord::cmp(&a.lo(), &b.lo())
         // If span starts are the same, sort by span end in reverse order.
         // This ensures that if spans A and B are adjacent in the list,
         // and they overlap but are not equal, then either:
         // - Span A extends further left, or
         // - Both have the same start and span A extends further right
         .then_with(|| Ord::cmp(&a.hi(), &b.hi()).reverse())
 }
	use std::collections::VecDeque;
	use std::iter;

	use rustc_data_structures::fx::FxHashSet;
	use rustc_middle::mir;
	use rustc_middle::ty::TyCtxt;
	use rustc_span::{DesugaringKind, ExpnKind, MacroKind, Span};
	use tracing::{debug, debug_span, instrument};

	use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
	use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir};
	use crate::coverage::{ExtractedHirInfo, mappings, unexpand};

	mod from_mir;

	pub(super) fn extract_refined_covspans<'tcx>(
	tcx: TyCtxt<'tcx>,
	mir_body: &mir::Body<'tcx>,
	hir_info: &ExtractedHirInfo,
	graph: &CoverageGraph,
	code_mappings: &mut impl Extend<mappings::CodeMapping>,
	) {
	let &ExtractedHirInfo { body_span, .. } = hir_info;

	let raw_spans = from_mir::extract_raw_spans_from_mir(mir_body, graph);
	let mut covspans = raw_spans
	.into_iter()
	.filter_map(\|RawSpanFromMir { raw_span, bcb }\| try {
	let (span, expn_kind) =
	unexpand::unexpand_into_body_span_with_expn_kind(raw_span, body_span)?;
	// Discard any spans that fill the entire body, because they tend
	// to represent compiler-inserted code, e.g. implicitly returning `()`.
	if span.source_equal(body_span) {
	return None;
	};
	SpanFromMir { span, expn_kind, bcb }
	})
	.collect::<Vec<_>>();

	// Only proceed if we found at least one usable span.
	if covspans.is_empty() {
	return;
	}

	// Also add the adjusted function signature span, if available.
	// Otherwise, add a fake span at the start of the body, to avoid an ugly
	// gap between the start of the body and the first real span.
	// FIXME: Find a more principled way to solve this problem.
	covspans.push(SpanFromMir::for_fn_sig(
	hir_info.fn_sig_span_extended.unwrap_or_else(\|\| body_span.shrink_to_lo()),
	));

	// First, perform the passes that need macro information.
	covspans.sort_by(\|a, b\| graph.cmp_in_dominator_order(a.bcb, b.bcb));
	remove_unwanted_expansion_spans(&mut covspans);
	shrink_visible_macro_spans(tcx, &mut covspans);

	// We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`.
	let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>();

	let compare_covspans = \|a: &Covspan, b: &Covspan\| {
	compare_spans(a.span, b.span)
	// After deduplication, we want to keep only the most-dominated BCB.
	.then_with(\|\| graph.cmp_in_dominator_order(a.bcb, b.bcb).reverse())
	};
	covspans.sort_by(compare_covspans);

	// Among covspans with the same span, keep only one,
	// preferring the one with the most-dominated BCB.
	// (Ideally we should try to preserve _all_ non-dominating BCBs, but that
	// requires a lot more complexity in the span refiner, for little benefit.)
	covspans.dedup_by(\|b, a\| a.span.source_equal(b.span));

	// Sort the holes, and merge overlapping/adjacent holes.
	let mut holes = hir_info
	.hole_spans
	.iter()
	.copied()
	// Discard any holes that aren't directly visible within the body span.
	.filter(\|&hole_span\| body_span.contains(hole_span) && body_span.eq_ctxt(hole_span))
	.map(\|span\| Hole { span })
	.collect::<Vec<_>>();
	holes.sort_by(\|a, b\| compare_spans(a.span, b.span));
	holes.dedup_by(\|b, a\| a.merge_if_overlapping_or_adjacent(b));

	// Split the covspans into separate buckets that don't overlap any holes.
	let buckets = divide_spans_into_buckets(covspans, &holes);

	for covspans in buckets {
	let _span = debug_span!("processing bucket", ?covspans).entered();

	let mut covspans = remove_unwanted_overlapping_spans(covspans);
	debug!(?covspans, "after removing overlaps");

	// Do one last merge pass, to simplify the output.
	covspans.dedup_by(\|b, a\| a.merge_if_eligible(b));
	debug!(?covspans, "after merge");

	code_mappings.extend(covspans.into_iter().map(\|Covspan { span, bcb }\| {
	// Each span produced by the refiner represents an ordinary code region.
	mappings::CodeMapping { span, bcb }
	}));
	}
	}

	/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
	/// multiple condition/consequent blocks that have the span of the whole macro
	/// invocation, which is unhelpful. Keeping only the first such span seems to
	/// give better mappings, so remove the others.
	///
	/// Similarly, `await` expands to a branch on the discriminant of `Poll`, which
	/// leads to incorrect coverage if the `Future` is immediately ready (#98712).
	///
	/// (The input spans should be sorted in BCB dominator order, so that the
	/// retained "first" span is likely to dominate the others.)
	fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) {
	let mut deduplicated_spans = FxHashSet::default();

	covspans.retain(\|covspan\| {
	match covspan.expn_kind {
	// Retain only the first await-related or macro-expanded covspan with this span.
	Some(ExpnKind::Desugaring(DesugaringKind::Await)) => {
	deduplicated_spans.insert(covspan.span)
	}
	Some(ExpnKind::Macro(MacroKind::Bang, _)) => deduplicated_spans.insert(covspan.span),
	// Ignore (retain) other spans.
	_ => true,
	}
	});
	}

	/// When a span corresponds to a macro invocation that is visible from the
	/// function body, truncate it to just the macro name plus `!`.
	/// This seems to give better results for code that uses macros.
	fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) {
	let source_map = tcx.sess.source_map();

	for covspan in covspans {
	if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) {
	covspan.span = source_map.span_through_char(covspan.span, '!');
	}
	}
	}

	/// Uses the holes to divide the given covspans into buckets, such that:
	/// - No span in any hole overlaps a bucket (discarding spans if necessary).
	/// - The spans in each bucket are strictly after all spans in previous buckets,
	/// and strictly before all spans in subsequent buckets.
	///
	/// The lists of covspans and holes must be sorted.
	/// The resulting buckets are sorted relative to each other, and each bucket's
	/// contents are sorted.
	#[instrument(level = "debug")]
	fn divide_spans_into_buckets(input_covspans: Vec<Covspan>, holes: &[Hole]) -> Vec<Vec<Covspan>> {
	debug_assert!(input_covspans.is_sorted_by(\|a, b\| compare_spans(a.span, b.span).is_le()));
	debug_assert!(holes.is_sorted_by(\|a, b\| compare_spans(a.span, b.span).is_le()));

	// Now we're ready to start grouping spans into buckets separated by holes.

	let mut input_covspans = VecDeque::from(input_covspans);

	// For each hole:
	// - Identify the spans that are entirely or partly before the hole.
	// - Discard any that overlap with the hole.
	// - Add the remaining identified spans to the corresponding bucket.
	let mut buckets = (0..holes.len()).map(\|_\| vec![]).collect::<Vec<_>>();
	for (hole, bucket) in holes.iter().zip(&mut buckets) {
	bucket.extend(
	drain_front_while(&mut input_covspans, \|c\| c.span.lo() < hole.span.hi())
	.filter(\|c\| !c.span.overlaps(hole.span)),
	);
	}

	// Any remaining spans form their own final bucket, after the final hole.
	// (If there were no holes, this will just be all of the initial spans.)
	buckets.push(Vec::from(input_covspans));

	buckets
	}

	/// Similar to `.drain(..)`, but stops just before it would remove an item not
	/// satisfying the predicate.
	fn drain_front_while<'a, T>(
	queue: &'a mut VecDeque<T>,
	mut pred_fn: impl FnMut(&T) -> bool,
	) -> impl Iterator<Item = T> {
	iter::from_fn(move \|\| queue.pop_front_if(\|x\| pred_fn(x)))
	}

	/// Takes one of the buckets of (sorted) spans extracted from MIR, and "refines"
	/// those spans by removing spans that overlap in unwanted ways.
	#[instrument(level = "debug")]
	fn remove_unwanted_overlapping_spans(sorted_spans: Vec<Covspan>) -> Vec<Covspan> {
	debug_assert!(sorted_spans.is_sorted_by(\|a, b\| compare_spans(a.span, b.span).is_le()));

	// Holds spans that have been read from the input vector, but haven't yet
	// been committed to the output vector.
	let mut pending = vec![];
	let mut refined = vec![];

	for curr in sorted_spans {
	pending.retain(\|prev: &Covspan\| {
	if prev.span.hi() <= curr.span.lo() {
	// There's no overlap between the previous/current covspans,
	// so move the previous one into the refined list.
	refined.push(prev.clone());
	false
	} else {
	// Otherwise, retain the previous covspan only if it has the
	// same BCB. This tends to discard long outer spans that enclose
	// smaller inner spans with different control flow.
	prev.bcb == curr.bcb
	}
	});
	pending.push(curr);
	}

	// Drain the rest of the pending list into the refined list.
	refined.extend(pending);
	refined
	}

	#[derive(Clone, Debug)]
	struct Covspan {
	span: Span,
	bcb: BasicCoverageBlock,
	}

	impl Covspan {
	/// If `self` and `other` can be merged (i.e. they have the same BCB),
	/// mutates `self.span` to also include `other.span` and returns true.
	///
	/// Note that compatible covspans can be merged even if their underlying
	/// spans are not overlapping/adjacent; any space between them will also be
	/// part of the merged covspan.
	fn merge_if_eligible(&mut self, other: &Self) -> bool {
	if self.bcb != other.bcb {
	return false;
	}

	self.span = self.span.to(other.span);
	true
	}
	}

	/// Compares two spans in (lo ascending, hi descending) order.
	fn compare_spans(a: Span, b: Span) -> std::cmp::Ordering {
	// First sort by span start.
	Ord::cmp(&a.lo(), &b.lo())
	// If span starts are the same, sort by span end in reverse order.
	// This ensures that if spans A and B are adjacent in the list,
	// and they overlap but are not equal, then either:
	// - Span A extends further left, or
	// - Both have the same start and span A extends further right
	.then_with(\|\| Ord::cmp(&a.hi(), &b.hi()).reverse())
	}