Initialize the sampler only once

PeterBowman · PeterBowman · commit 36f852b018f0 · 2025-08-12T12:56:29.000+02:00
diff --git a/libraries/YarpPlugins/LlamaGPT/DeviceDriverImpl.cpp b/libraries/YarpPlugins/LlamaGPT/DeviceDriverImpl.cpp
@@ -99,6 +99,10 @@ bool LlamaGPT::open(yarp::os::Searchable & config)
         }
     }
 
+    // initialize the sampler
+    smpl = llama_sampler_chain_init(llama_sampler_chain_default_params());
+    llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
+
     return true;
 }
 
@@ -108,6 +112,12 @@ bool LlamaGPT::close()
 {
     bool ret = deleteConversation();
 
+    if (smpl)
+    {
+        llama_sampler_free(smpl);
+        smpl = nullptr;
+    }
+
     if (model)
     {
         llama_model_free(model);
diff --git a/libraries/YarpPlugins/LlamaGPT/ILLMImpl.cpp b/libraries/YarpPlugins/LlamaGPT/ILLMImpl.cpp
@@ -167,15 +167,6 @@ bool LlamaGPT::ask(const std::string & question, yarp::dev::LLM_Message & answer
 #endif
     }
 
-    // initialize the sampler
-
-    auto sparams = llama_sampler_chain_default_params();
-    sparams.no_perf = true;
-
-    llama_sampler * smpl = llama_sampler_chain_init(sparams);
-
-    llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
-
     // prepare a batch for the prompt
     llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
 
@@ -228,7 +219,6 @@ bool LlamaGPT::ask(const std::string & question, yarp::dev::LLM_Message & answer
         }
     }
 
-    llama_sampler_free(smpl);
     llama_free(ctx);
 
     yCDebug(LLAMA) << "Generated:" << out;
diff --git a/libraries/YarpPlugins/LlamaGPT/LlamaGPT.hpp b/libraries/YarpPlugins/LlamaGPT/LlamaGPT.hpp
@@ -53,6 +53,8 @@ class LlamaGPT : public yarp::dev::DeviceDriver,
 
 private:
     llama_model * model {nullptr};
+    llama_sampler * smpl {nullptr};
+
     std::vector<llama_chat_message> conversation;
 };
 

Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,10 @@ bool LlamaGPT::open(yarp::os::Searchable & config)`
`99`	`99`	`}`
`100`	`100`	`}`
`101`	`101`
	`102`	`+ // initialize the sampler`
	`103`	`+ smpl = llama_sampler_chain_init(llama_sampler_chain_default_params());`
	`104`	`+ llama_sampler_chain_add(smpl, llama_sampler_init_greedy());`
	`105`	`+`
`102`	`106`	`return true;`
`103`	`107`	`}`
`104`	`108`
`@@ -108,6 +112,12 @@ bool LlamaGPT::close()`
`108`	`112`	`{`
`109`	`113`	`bool ret = deleteConversation();`
`110`	`114`
	`115`	`+ if (smpl)`
	`116`	`+ {`
	`117`	`+ llama_sampler_free(smpl);`
	`118`	`+ smpl = nullptr;`
	`119`	`+ }`
	`120`	`+`
`111`	`121`	`if (model)`
`112`	`122`	`{`
`113`	`123`	`llama_model_free(model);`
Original file line number	Diff line number	Diff line change
`@@ -167,15 +167,6 @@ bool LlamaGPT::ask(const std::string & question, yarp::dev::LLM_Message & answer`
`167`	`167`	`#endif`
`168`	`168`	`}`
`169`	`169`
`170`		`- // initialize the sampler`
`171`		`-`
`172`		`- auto sparams = llama_sampler_chain_default_params();`
`173`		`- sparams.no_perf = true;`
`174`		`-`
`175`		`- llama_sampler * smpl = llama_sampler_chain_init(sparams);`
`176`		`-`
`177`		`- llama_sampler_chain_add(smpl, llama_sampler_init_greedy());`
`178`		`-`
`179`	`170`	`// prepare a batch for the prompt`
`180`	`171`	`llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());`
`181`	`172`
`@@ -228,7 +219,6 @@ bool LlamaGPT::ask(const std::string & question, yarp::dev::LLM_Message & answer`
`228`	`219`	`}`
`229`	`220`	`}`
`230`	`221`
`231`		`- llama_sampler_free(smpl);`
`232`	`222`	`llama_free(ctx);`
`233`	`223`
`234`	`224`	`yCDebug(LLAMA) << "Generated:" << out;`