From 38995576cb260787c038ab5846ee8825b450b1b6 Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Thu, 31 Jul 2025 10:51:46 -0700 Subject: [PATCH 1/3] do reachability for all harnesses before codegen --- .../compiler_interface.rs | 153 +++++++++++------- kani-compiler/src/kani_middle/reachability.rs | 20 +++ 2 files changed, 118 insertions(+), 55 deletions(-) diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index ebd20f0d53f1..4b5988bd2b54 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -8,15 +8,16 @@ use crate::codegen_cprover_gotoc::GotocCtx; use crate::kani_middle::analysis; use crate::kani_middle::attributes::KaniAttributes; use crate::kani_middle::check_reachable_items; -use crate::kani_middle::codegen_units::{CodegenUnit, CodegenUnits}; +use crate::kani_middle::codegen_units::{CodegenUnit, CodegenUnits, Harness}; use crate::kani_middle::provide; -use crate::kani_middle::reachability::{collect_reachable_items, filter_crate_items}; +use crate::kani_middle::reachability::{ + ReachabilityInfo, collect_reachable_items, filter_crate_items, +}; use crate::kani_middle::transform::{BodyTransformation, GlobalPasses}; use crate::kani_queries::QueryDb; -use cbmc::RoundingMode; use cbmc::goto_program::Location; use cbmc::irep::goto_binary_serde::write_goto_binary_file; -use cbmc::{InternedString, MachineModel}; +use cbmc::{InternedString, MachineModel, RoundingMode}; use kani_metadata::artifact::convert_type; use kani_metadata::{ArtifactType, HarnessMetadata, KaniMetadata, UnsupportedFeature}; use kani_metadata::{AssignsContract, CompilerArtifactStub}; @@ -54,7 +55,6 @@ use tracing::{debug, info}; pub type UnsupportedConstructs = FxHashMap>; -#[derive(Clone)] pub struct GotocCodegenBackend { /// The query is shared with `KaniCompiler` and it is initialized as part of `rustc` /// initialization, which may happen after this object is created. @@ -74,26 +74,15 @@ impl GotocCodegenBackend { fn codegen_items<'tcx>( &self, tcx: TyCtxt<'tcx>, - starting_items: &[MonoItem], + mut reachability: ReachabilityInfo, symtab_goto: &Path, machine_model: &MachineModel, check_contract: Option, mut transformer: BodyTransformation, ) -> (GotocCtx<'tcx>, Vec, Option) { - // This runs reachability analysis before global passes are applied. - // - // Alternatively, we could run reachability only once after the global passes are applied - // and resolve the necessary dependencies inside the passes on the fly. This, however, has a - // disadvantage of not having a precomputed call graph for the global passes to use. The - // call graph could be used, for example, in resolving function pointer or vtable calls for - // global passes that need this. - let (mut items, call_graph) = with_timer( - || collect_reachable_items(tcx, &mut transformer, starting_items), - "codegen reachability analysis", - ); - // Retrieve all instances from the currently codegened items. - let instances = items + let instances = reachability + .reachable .iter() .filter_map(|item| match item { MonoItem::Fn(instance) => Some(*instance), @@ -110,16 +99,16 @@ impl GotocCodegenBackend { let any_pass_modified = global_passes.run_global_passes( &mut transformer, tcx, - starting_items, + &reachability.starting, instances, - call_graph, + reachability.call_graph, ); // Re-collect reachable items after global transformations were applied. This is necessary // since global pass could add extra calls to instrumentation. if any_pass_modified { - (items, _) = with_timer( - || collect_reachable_items(tcx, &mut transformer, starting_items), + (reachability.reachable, _) = with_timer( + || collect_reachable_items(tcx, &mut transformer, &reachability.starting), "codegen reachability analysis (second pass)", ); } @@ -128,12 +117,12 @@ impl GotocCodegenBackend { // https://rustc-dev-guide.rust-lang.org/conventions.html#naming-conventions let mut gcx = GotocCtx::new(tcx, (*self.queries.lock().unwrap()).clone(), machine_model, transformer); - check_reachable_items(gcx.tcx, &gcx.queries, &items); + check_reachable_items(gcx.tcx, &gcx.queries, &reachability.reachable); let contract_info = with_timer( || { // we first declare all items - for item in &items { + for item in &reachability.reachable { match *item { MonoItem::Fn(instance) => { gcx.call_with_panic_debug_info( @@ -154,7 +143,7 @@ impl GotocCodegenBackend { } // then we move on to codegen - for item in &items { + for item in &reachability.reachable { match *item { MonoItem::Fn(instance) => { gcx.call_with_panic_debug_info( @@ -178,7 +167,8 @@ impl GotocCodegenBackend { } } - check_contract.map(|check_id| gcx.handle_check_contract(check_id, &items)) + check_contract + .map(|check_id| gcx.handle_check_contract(check_id, &reachability.reachable)) }, "codegen", ); @@ -212,7 +202,7 @@ impl GotocCodegenBackend { } } - (gcx, items, contract_info) + (gcx, reachability.reachable, contract_info) } /// Given a contract harness, get the DefId of its target. @@ -237,6 +227,30 @@ impl GotocCodegenBackend { } } +pub type HarnessWithReachable<'a> = (&'a Harness, ReachabilityInfo); + +/// Returns a function that can handle reachability analysis for all harnesses in a given unit, +/// using the given QueryDb and TyCtxt. +/// +/// (This could also just be done as a big closure inside of `codegen_crate` where we +/// generate `ordered_harnesses` with the nested map, but this seemed cleaner.) +fn reachability_analysis_fn_for_harness<'a>( + unit: &CodegenUnit, + queries: &QueryDb, + tcx: TyCtxt, +) -> impl Fn(&'a Harness) -> (HarnessWithReachable<'a>, BodyTransformation) { + move |harness: &'a Harness| { + let mut transformer = BodyTransformation::new(queries, tcx, unit); + + let info = with_timer( + || ReachabilityInfo::generate_from(tcx, &mut transformer, vec![MonoItem::Fn(*harness)]), + "codegen reachability analysis", + ); + + ((harness, info), transformer) + } +} + impl CodegenBackend for GotocCodegenBackend { fn provide(&self, providers: &mut Providers) { provide::provide(providers, &self.queries.lock().unwrap()); @@ -329,38 +343,64 @@ impl CodegenBackend for GotocCodegenBackend { let base_filename = base_filepath.as_path(); let reachability = queries.args().reachability_analysis; let mut results = GotoCodegenResults::new(tcx, reachability); + match reachability { ReachabilityType::AllFns | ReachabilityType::Harnesses => { let mut units = CodegenUnits::new(&queries, tcx); let mut modifies_instances = vec![]; let mut loop_contracts_instances = vec![]; - // Cross-crate collecting of all items that are reachable from the crate harnesses. - for unit in units.iter() { - // We reset the body cache for now because each codegen unit has different - // configurations that affect how we transform the instance body. - for harness in &unit.harnesses { - let transformer = BodyTransformation::new(&queries, tcx, unit); - let model_path = units.harness_model_path(*harness).unwrap(); - let is_automatic_harness = units.is_automatic_harness(harness); - let contract_metadata = - self.target_def_id_for_harness(tcx, harness, is_automatic_harness); - let (gcx, items, contract_info) = self.codegen_items( - tcx, - &[MonoItem::Fn(*harness)], - model_path, - &results.machine_model, - contract_metadata, - transformer, - ); - if gcx.has_loop_contracts { - loop_contracts_instances.push(*harness); - } - results.extend(gcx, items, None); - if let Some(assigns_contract) = contract_info { - modifies_instances.push((*harness, assigns_contract)); - } + + // First, do cross-crate collection of all items that are reachable from each harness. The resulting + // iterator has the reachability result for each harness, but also the transformer that harness used so + // we can reuse it during codegen. + let harness_reachability = units + .iter() + .map(|unit| { + unit.harnesses + .iter() + .map(reachability_analysis_fn_for_harness(unit, &queries, tcx)) + .collect::>() + }) + .flatten(); + + // This runs reachability analysis before global passes are applied in `codegen_items`. + // + // Alternatively, we could run reachability only once after the global passes are applied + // and resolve the necessary dependencies inside the passes on the fly. This, however, has a + // disadvantage of not having a precomputed call graph for the global passes to use. The + // call graph could be used, for example, in resolving function pointer or vtable calls for + // global passes that need this. + + // Then, actually codegen those reachable items for each. + for ((harness, reachability), transformer) in harness_reachability { + let harness_start = std::time::Instant::now(); + let model_path = units.harness_model_path(*harness).unwrap(); + let is_automatic_harness = units.is_automatic_harness(harness); + let contract_metadata = + self.target_def_id_for_harness(tcx, harness, is_automatic_harness); + + let (min_gcx, items, contract_info) = self.codegen_items( + tcx, + reachability, + model_path, + &results.machine_model, + contract_metadata, + transformer, + ); + if min_gcx.has_loop_contracts { + loop_contracts_instances.push(*harness); } + results.extend(min_gcx, items, None); + if let Some(assigns_contract) = contract_info { + modifies_instances.push((*harness, assigns_contract)); + } + println!( + "working on harness {:?} took {:?}", + harness.trimmed_name(), + harness_start.elapsed() + ); } + units.store_modifies(&modifies_instances); units.store_loop_contracts(&loop_contracts_instances); units.write_metadata(&queries, tcx); @@ -368,7 +408,9 @@ impl CodegenBackend for GotocCodegenBackend { ReachabilityType::None => {} ReachabilityType::PubFns => { let unit = CodegenUnit::default(); - let transformer = BodyTransformation::new(&queries, tcx, &unit); + let mut transformer = BodyTransformation::new(&queries, tcx, &unit); + + // Find local reachable functions as the entrypoints for reachability analysis. let main_instance = rustc_public::entry_fn() .map(|main_fn| Instance::try_from(main_fn).unwrap()); let local_reachable = filter_crate_items(tcx, |_, instance| { @@ -378,10 +420,11 @@ impl CodegenBackend for GotocCodegenBackend { .into_iter() .map(MonoItem::Fn) .collect::>(); + let model_path = base_filename.with_extension(ArtifactType::SymTabGoto); let (gcx, items, contract_info) = self.codegen_items( tcx, - &local_reachable, + ReachabilityInfo::generate_from(tcx, &mut transformer, local_reachable), &model_path, &results.machine_model, Default::default(), diff --git a/kani-compiler/src/kani_middle/reachability.rs b/kani-compiler/src/kani_middle/reachability.rs index da334ba41054..ec4cdad5bf9f 100644 --- a/kani-compiler/src/kani_middle/reachability.rs +++ b/kani-compiler/src/kani_middle/reachability.rs @@ -73,6 +73,26 @@ pub fn collect_reachable_items( (sorted_items, collector.call_graph) } +pub struct ReachabilityInfo { + /// The initial items used as entrypoints for reachability analysis. + pub starting: Vec, + /// All the items reachability analysis determined to be reachable from the `starting` items. + pub reachable: Vec, + pub call_graph: CallGraph, +} + +impl ReachabilityInfo { + pub fn generate_from( + tcx: TyCtxt, + transformer: &mut BodyTransformation, + starting_items: Vec, + ) -> Self { + let (reachable_items, call_graph) = + collect_reachable_items(tcx, transformer, &starting_items); + ReachabilityInfo { starting: starting_items, reachable: reachable_items, call_graph } + } +} + /// Collect all (top-level) items in the crate that matches the given predicate. /// An item can only be a root if they are a non-generic function. pub fn filter_crate_items(tcx: TyCtxt, predicate: F) -> Vec From 96e9c30035132b75161fac7d9585d37a895a7cf7 Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Thu, 31 Jul 2025 11:29:30 -0700 Subject: [PATCH 2/3] apply heuristics to order harness codegen --- .../compiler_interface.rs | 23 +++---- .../src/codegen_cprover_gotoc/mod.rs | 2 +- .../src/kani_middle/codegen_order.rs | 65 +++++++++++++++++++ kani-compiler/src/kani_middle/mod.rs | 1 + 4 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 kani-compiler/src/kani_middle/codegen_order.rs diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index 4b5988bd2b54..d5ceac0cf9b6 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -8,6 +8,7 @@ use crate::codegen_cprover_gotoc::GotocCtx; use crate::kani_middle::analysis; use crate::kani_middle::attributes::KaniAttributes; use crate::kani_middle::check_reachable_items; +use crate::kani_middle::codegen_order::HeuristicOrderable; use crate::kani_middle::codegen_units::{CodegenUnit, CodegenUnits, Harness}; use crate::kani_middle::provide; use crate::kani_middle::reachability::{ @@ -15,9 +16,10 @@ use crate::kani_middle::reachability::{ }; use crate::kani_middle::transform::{BodyTransformation, GlobalPasses}; use crate::kani_queries::QueryDb; +use cbmc::RoundingMode; use cbmc::goto_program::Location; use cbmc::irep::goto_binary_serde::write_goto_binary_file; -use cbmc::{InternedString, MachineModel, RoundingMode}; +use cbmc::{InternedString, MachineModel}; use kani_metadata::artifact::convert_type; use kani_metadata::{ArtifactType, HarnessMetadata, KaniMetadata, UnsupportedFeature}; use kani_metadata::{AssignsContract, CompilerArtifactStub}; @@ -353,15 +355,14 @@ impl CodegenBackend for GotocCodegenBackend { // First, do cross-crate collection of all items that are reachable from each harness. The resulting // iterator has the reachability result for each harness, but also the transformer that harness used so // we can reuse it during codegen. - let harness_reachability = units - .iter() - .map(|unit| { - unit.harnesses - .iter() - .map(reachability_analysis_fn_for_harness(unit, &queries, tcx)) - .collect::>() - }) - .flatten(); + let ordered_harnesses = units.iter().map(|unit| { + unit.harnesses + .iter() + .map(reachability_analysis_fn_for_harness(unit, &queries, tcx)) + .collect::>() + }) + .apply_ordering_heuristic::() + .flatten(); // This runs reachability analysis before global passes are applied in `codegen_items`. // @@ -372,7 +373,7 @@ impl CodegenBackend for GotocCodegenBackend { // global passes that need this. // Then, actually codegen those reachable items for each. - for ((harness, reachability), transformer) in harness_reachability { + for ((harness, reachability), transformer) in ordered_harnesses { let harness_start = std::time::Instant::now(); let model_path = units.harness_model_path(*harness).unwrap(); let is_automatic_harness = units.is_automatic_harness(harness); diff --git a/kani-compiler/src/codegen_cprover_gotoc/mod.rs b/kani-compiler/src/codegen_cprover_gotoc/mod.rs index 7454d748d660..7973edf35411 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/mod.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/mod.rs @@ -6,6 +6,6 @@ mod context; mod overrides; mod utils; -pub use compiler_interface::{GotocCodegenBackend, UnsupportedConstructs}; +pub use compiler_interface::{GotocCodegenBackend, HarnessWithReachable, UnsupportedConstructs}; pub use context::GotocCtx; pub use context::VtableCtx; diff --git a/kani-compiler/src/kani_middle/codegen_order.rs b/kani-compiler/src/kani_middle/codegen_order.rs new file mode 100644 index 000000000000..b5ff3befbeb9 --- /dev/null +++ b/kani-compiler/src/kani_middle/codegen_order.rs @@ -0,0 +1,65 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Utilities for optimizing the order of Kani codegen. +//! +//! When compiling with more than a single thread, the order in which we codegen harnesses can have +//! an non-negligable impact on performance. Specifically, if we handle harness that will generate +//! a lot of code near the end of compilation, the main compiler thread can get stuck waiting for +//! worker threads to export that code, slowing down overall compilation. +//! +//! This module currently provides a simple [MostReachableItems] heuristic to combat that, but more +//! complex heuristics might be able to improve on this or avoid other kinds of pitfalls. + +use crate::{ + codegen_cprover_gotoc::HarnessWithReachable, kani_middle::transform::BodyTransformation, +}; + +/// Orders harnesses within a [CodegenUnit](crate::kani_middle::codegen_units::CodegenUnit) based on +/// **the raw number of items found during reachability analysis**, putting those with more first. +/// +/// The number of reachable items seems to be a good proxy for the amount of code we will generate and +/// thus how long both codegen and the goto file exporting will take. Putting the harnesses that will take +/// the longest first ensures that +pub struct MostReachableItems; + +impl CodegenHeuristic for MostReachableItems { + fn evaluate_harness(harness: &HarnessWithReachable) -> usize { + harness.1.reachable.len() + } +} + +pub trait CodegenHeuristic { + /// Evaluate and rate a harness based on the given heuristic (where *higher is better*). + fn evaluate_harness(harness: &HarnessWithReachable) -> usize; +} + +fn reorder_harnesses<'a, H: CodegenHeuristic>( + harnesses: &mut Vec<(HarnessWithReachable<'a>, BodyTransformation)>, +) { + // Sort is ascending by default, so `usize::MAX - ...` ensures higher rated harnesses come first. + // We don't care about stability, and for cheap heuristic fns like the one for `MostReachableItems`, + // caching isn't likely to make a difference. + harnesses.sort_unstable_by_key(|(harness, _)| usize::MAX - H::evaluate_harness(harness)); +} + +/// Simple trait extender to allow us to call `.apply_...()` on the right kind of iterators. +/// Could also just be implemented as a function, but this matches the iterator style now used +/// for reachability in `codegen_crate`. +pub trait HeuristicOrderable: Iterator { + fn apply_ordering_heuristic(self) -> impl Iterator; +} + +impl<'a, I> HeuristicOrderable for I +where + I: Iterator, BodyTransformation)>>, +{ + /// Apply an codegen ordering heuristic to an iterator over codegen units. + fn apply_ordering_heuristic(self) -> impl Iterator { + // Reorder harnesses within each codegen unit according to `T`. + self.map(|mut harnesses| { + reorder_harnesses::(&mut harnesses); + harnesses + }) + } +} diff --git a/kani-compiler/src/kani_middle/mod.rs b/kani-compiler/src/kani_middle/mod.rs index 3f19525c8dd7..ea3ad2b59d8f 100644 --- a/kani-compiler/src/kani_middle/mod.rs +++ b/kani-compiler/src/kani_middle/mod.rs @@ -24,6 +24,7 @@ use self::attributes::KaniAttributes; pub mod abi; pub mod analysis; pub mod attributes; +pub mod codegen_order; pub mod codegen_units; pub mod coercion; mod intrinsics; From 723003201aae5ee3b05f74a55434461bcf154558 Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Thu, 31 Jul 2025 14:28:44 -0700 Subject: [PATCH 3/3] remove accidental print statement --- .../src/codegen_cprover_gotoc/compiler_interface.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index d5ceac0cf9b6..55fd285d06d6 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -374,7 +374,6 @@ impl CodegenBackend for GotocCodegenBackend { // Then, actually codegen those reachable items for each. for ((harness, reachability), transformer) in ordered_harnesses { - let harness_start = std::time::Instant::now(); let model_path = units.harness_model_path(*harness).unwrap(); let is_automatic_harness = units.is_automatic_harness(harness); let contract_metadata = @@ -395,11 +394,6 @@ impl CodegenBackend for GotocCodegenBackend { if let Some(assigns_contract) = contract_info { modifies_instances.push((*harness, assigns_contract)); } - println!( - "working on harness {:?} took {:?}", - harness.trimmed_name(), - harness_start.elapsed() - ); } units.store_modifies(&modifies_instances);