openvinotoolkit · Wovchena · Oct 17, 2025 · Sep 23, 2025 · as-suvorov · Sep 23, 2025
diff --git a/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp b/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp
@@ -4,6 +4,12 @@
 #include "audio_utils.hpp"
 #include "openvino/genai/whisper_pipeline.hpp"
 
+auto get_config_for_cache() {
+    ov::AnyMap config;
+    config.insert({ov::cache_dir("whisper_cache")});
+    return config;
+}
+
 int main(int argc, char* argv[]) try {
     if (argc < 3 || argc > 4) {
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<WAV_FILE_PATH>\" <DEVICE>");
@@ -13,7 +19,14 @@ int main(int argc, char* argv[]) try {
     std::string wav_file_path = argv[2];
     std::string device = (argc == 4) ? argv[3] : "CPU";  // Default to CPU if no device is provided
 
-    ov::genai::WhisperPipeline pipeline(models_path, device);
+    ov::AnyMap ov_config;
+    if (device == "NPU" || device.find("GPU") != std::string::npos) {  // need to handle cases like "GPU", "GPU.0" and "GPU.1"
+        // Cache compiled models on disk for GPU and NPU to save time on the
+        // next run. It's not beneficial for CPU.
+        ov_config = get_config_for_cache();
+    }
+
+    ov::genai::WhisperPipeline pipeline(models_path, device, ov_config);
 
     ov::genai::WhisperGenerationConfig config = pipeline.get_generation_config();
     // 'task' and 'language' parameters are supported for multilingual models only

diff --git a/samples/python/whisper_speech_recognition/whisper_speech_recognition.py b/samples/python/whisper_speech_recognition/whisper_speech_recognition.py
@@ -11,6 +11,10 @@ def read_wav(filepath):
     raw_speech, samplerate = librosa.load(filepath, sr=16000)
     return raw_speech.tolist()
 
+def get_config_for_cache():
+    config_cache = dict()
+    config_cache["CACHE_DIR"] = "whisper_cache"
+    return config_cache
 
 def main():
     parser = argparse.ArgumentParser()
@@ -19,7 +23,13 @@ def main():
     parser.add_argument("device", nargs="?", default="CPU", help="Device to run the model on (default: CPU)")
     args = parser.parse_args()
 
-    pipe = openvino_genai.WhisperPipeline(args.model_dir, args.device)
+    ov_config = dict()
+    if args.device == "NPU" or "GPU" in args.device: # need to handle cases like "GPU", "GPU.0" and "GPU.1"
+        # Cache compiled models on disk for GPU and NPU to save time on the
+        # next run. It's not beneficial for CPU.
+        ov_config = get_config_for_cache()
+
+    pipe = openvino_genai.WhisperPipeline(args.model_dir, args.device, **ov_config)
 
     config = pipe.get_generation_config()
     # 'task' and 'language' parameters are supported for multilingual models only