Skip to content

Commit fc45bb8

Browse files
committed
talk-llama : sync llama.cpp
ggml-ci
1 parent 33c3c2f commit fc45bb8

28 files changed

+1905
-283
lines changed

examples/talk-llama/llama-arch.cpp

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
6262
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
6363
{ LLM_ARCH_CHATGLM, "chatglm" },
6464
{ LLM_ARCH_GLM4, "glm4" },
65+
{ LLM_ARCH_GLM4_MOE, "glm4moe" },
6566
{ LLM_ARCH_BITNET, "bitnet" },
6667
{ LLM_ARCH_T5, "t5" },
6768
{ LLM_ARCH_T5ENCODER, "t5encoder" },
@@ -85,9 +86,13 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8586
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
8687
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
8788
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
89+
{ LLM_ARCH_HUNYUAN_DENSE, "hunyuan-dense" },
8890
{ LLM_ARCH_SMOLLM3, "smollm3" },
91+
{ LLM_ARCH_OPENAI_MOE, "gpt-oss" },
8992
{ LLM_ARCH_LFM2, "lfm2" },
9093
{ LLM_ARCH_DREAM, "dream" },
94+
{ LLM_ARCH_SMALLTHINKER, "smallthinker" },
95+
{ LLM_ARCH_LLADA, "llada" },
9196
{ LLM_ARCH_UNKNOWN, "(unknown)" },
9297
};
9398

@@ -124,6 +129,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
124129
{ LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
125130
{ LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
126131
{ LLM_KV_MOE_EVERY_N_LAYERS, "%s.moe_every_n_layers" },
132+
{ LLM_KV_NEXTN_PREDICT_LAYERS, "%s.nextn_predict_layers" },
127133
{ LLM_KV_POOLING_TYPE, "%s.pooling_type" },
128134
{ LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
129135
{ LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
@@ -1388,6 +1394,40 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
13881394
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
13891395
},
13901396
},
1397+
{
1398+
LLM_ARCH_GLM4_MOE,
1399+
{
1400+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1401+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1402+
{ LLM_TENSOR_OUTPUT, "output" },
1403+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1404+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1405+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1406+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1407+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1408+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1409+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1410+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1411+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1412+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1413+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1414+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1415+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1416+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1417+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1418+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1419+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1420+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1421+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1422+
// NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1423+
{ LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.nextn.eh_proj" },
1424+
{ LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
1425+
{ LLM_TENSOR_NEXTN_ENORM, "blk.%d.nextn.enorm" },
1426+
{ LLM_TENSOR_NEXTN_HNORM, "blk.%d.nextn.hnorm" },
1427+
{ LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
1428+
{ LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
1429+
},
1430+
},
13911431
{
13921432
LLM_ARCH_BITNET,
13931433
{
@@ -1895,6 +1935,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
18951935
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
18961936
},
18971937
},
1938+
{
1939+
LLM_ARCH_HUNYUAN_DENSE,
1940+
{
1941+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1942+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1943+
{ LLM_TENSOR_OUTPUT, "output" },
1944+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1945+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1946+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1947+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1948+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1949+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1950+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1951+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1952+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1953+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1954+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1955+
1956+
},
1957+
},
18981958
{
18991959
LLM_ARCH_SMOLLM3,
19001960
{
@@ -1912,6 +1972,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
19121972
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
19131973
},
19141974
},
1975+
{
1976+
LLM_ARCH_OPENAI_MOE,
1977+
{
1978+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1979+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1980+
{ LLM_TENSOR_OUTPUT, "output" },
1981+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1982+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1983+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1984+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1985+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1986+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1987+
{ LLM_TENSOR_ATTN_SINKS, "blk.%d.attn_sinks" },
1988+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1989+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1990+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1991+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1992+
},
1993+
},
19151994
{
19161995
LLM_ARCH_LFM2,
19171996
{
@@ -1933,6 +2012,27 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
19332012
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
19342013
}
19352014
},
2015+
{
2016+
LLM_ARCH_SMALLTHINKER,
2017+
{
2018+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2019+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2020+
{ LLM_TENSOR_OUTPUT, "output" },
2021+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2022+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2023+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2024+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2025+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2026+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2027+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2028+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2029+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2030+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2031+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2032+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2033+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }
2034+
},
2035+
},
19362036
{
19372037
LLM_ARCH_DREAM,
19382038
{
@@ -1950,6 +2050,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
19502050
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
19512051
},
19522052
},
2053+
{
2054+
LLM_ARCH_LLADA,
2055+
{
2056+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2057+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2058+
{ LLM_TENSOR_OUTPUT, "output" },
2059+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2060+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2061+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2062+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2063+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2064+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2065+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2066+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2067+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2068+
},
2069+
},
19532070
{
19542071
LLM_ARCH_UNKNOWN,
19552072
{
@@ -1989,6 +2106,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
19892106
{LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19902107
{LLM_TENSOR_ATTN_K_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19912108
{LLM_TENSOR_ATTN_V_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2109+
{LLM_TENSOR_ATTN_SINKS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SCALE}},
19922110
{LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19932111
{LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19942112
{LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
@@ -2120,6 +2238,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
21202238
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
21212239
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
21222240
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2241+
// NextN/MTP tensors are currently ignored (reserved for future MTP support)
2242+
// These tensors only exist in the last layer(s) and are treated as output tensors
2243+
{LLM_TENSOR_NEXTN_EH_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2244+
{LLM_TENSOR_NEXTN_EMBED_TOKENS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2245+
{LLM_TENSOR_NEXTN_ENORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2246+
{LLM_TENSOR_NEXTN_HNORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2247+
{LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2248+
{LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
21232249
};
21242250

21252251
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -2202,6 +2328,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
22022328
bool llm_arch_is_diffusion(const llm_arch & arch) {
22032329
switch (arch) {
22042330
case LLM_ARCH_DREAM:
2331+
case LLM_ARCH_LLADA:
22052332
return true;
22062333
default:
22072334
return false;

examples/talk-llama/llama-arch.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ enum llm_arch {
6666
LLM_ARCH_DEEPSEEK2,
6767
LLM_ARCH_CHATGLM,
6868
LLM_ARCH_GLM4,
69+
LLM_ARCH_GLM4_MOE,
6970
LLM_ARCH_BITNET,
7071
LLM_ARCH_T5,
7172
LLM_ARCH_T5ENCODER,
@@ -89,9 +90,13 @@ enum llm_arch {
8990
LLM_ARCH_ERNIE4_5,
9091
LLM_ARCH_ERNIE4_5_MOE,
9192
LLM_ARCH_HUNYUAN_MOE,
93+
LLM_ARCH_HUNYUAN_DENSE,
9294
LLM_ARCH_SMOLLM3,
95+
LLM_ARCH_OPENAI_MOE,
9396
LLM_ARCH_LFM2,
9497
LLM_ARCH_DREAM,
98+
LLM_ARCH_SMALLTHINKER,
99+
LLM_ARCH_LLADA,
95100
LLM_ARCH_UNKNOWN,
96101
};
97102

@@ -128,6 +133,7 @@ enum llm_kv {
128133
LLM_KV_EXPERT_WEIGHTS_NORM,
129134
LLM_KV_EXPERT_GATING_FUNC,
130135
LLM_KV_MOE_EVERY_N_LAYERS,
136+
LLM_KV_NEXTN_PREDICT_LAYERS,
131137
LLM_KV_POOLING_TYPE,
132138
LLM_KV_LOGIT_SCALE,
133139
LLM_KV_DECODER_START_TOKEN_ID,
@@ -260,6 +266,7 @@ enum llm_tensor {
260266
LLM_TENSOR_ATTN_OUT_NORM,
261267
LLM_TENSOR_ATTN_POST_NORM,
262268
LLM_TENSOR_ATTN_ROT_EMBD,
269+
LLM_TENSOR_ATTN_SINKS,
263270
LLM_TENSOR_FFN_GATE_INP,
264271
LLM_TENSOR_FFN_GATE_INP_SHEXP,
265272
LLM_TENSOR_FFN_NORM,
@@ -406,6 +413,12 @@ enum llm_tensor {
406413
LLM_TENSOR_SHORTCONV_CONV,
407414
LLM_TENSOR_SHORTCONV_INPROJ,
408415
LLM_TENSOR_SHORTCONV_OUTPROJ,
416+
LLM_TENSOR_NEXTN_EH_PROJ,
417+
LLM_TENSOR_NEXTN_EMBED_TOKENS,
418+
LLM_TENSOR_NEXTN_ENORM,
419+
LLM_TENSOR_NEXTN_HNORM,
420+
LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
421+
LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
409422
};
410423

411424
enum llm_tensor_layer {

examples/talk-llama/llama-batch.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ bool llama_batch_allocr::init(
5959
for (int32_t i = 0; i < batch.n_tokens; ++i) {
6060
for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) {
6161
if (batch.seq_id && (batch.seq_id[i][s] < 0 || batch.seq_id[i][s] >= (llama_seq_id) n_seq_max)) {
62-
LLAMA_LOG_ERROR("%s: invalid seq_id[%d][%d] = %d > %d\n", __func__, i, s, batch.seq_id[i][s], (llama_seq_id) n_seq_max);
62+
LLAMA_LOG_ERROR("%s: invalid seq_id[%d][%d] = %d >= %d\n", __func__, i, s, batch.seq_id[i][s], (llama_seq_id) n_seq_max);
6363
return false;
6464
}
6565
}
@@ -477,7 +477,7 @@ llama_ubatch llama_batch_allocr::split_simple(uint32_t n_ubatch) {
477477

478478
llama_ubatch llama_batch_allocr::split_equal(uint32_t n_ubatch, bool sequential) {
479479
if (sequential && has_cpl) {
480-
LLAMA_LOG_ERROR("%s: sequential split is not supported when there are coupled sequences in the input batch\n", __func__);
480+
LLAMA_LOG_ERROR("%s: sequential split is not supported when there are coupled sequences in the input batch (you may need to use the -kvu flag)\n", __func__);
481481

482482
return {};
483483
}

examples/talk-llama/llama-chat.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
6666
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
6767
{ "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
6868
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
69+
{ "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE },
70+
{ "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE },
6971
{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
7072
};
7173

@@ -191,8 +193,12 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
191193
return LLM_CHAT_TEMPLATE_LLAMA4;
192194
} else if (tmpl_contains("<|endofuserprompt|>")) {
193195
return LLM_CHAT_TEMPLATE_DOTS1;
194-
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
196+
} else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
195197
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
198+
} else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
199+
return LLM_CHAT_TEMPLATE_OPENAI_MOE;
200+
} else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
201+
return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
196202
} else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
197203
return LLM_CHAT_TEMPLATE_KIMI_K2;
198204
}
@@ -619,8 +625,6 @@ int32_t llm_chat_apply_template(
619625
} else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
620626
// Yandex template ("\n\n" is defined as EOT token)
621627

622-
ss << "<s>";
623-
624628
for (size_t i = 0; i < chat.size(); i++) {
625629
std::string role(chat[i]->role);
626630
if (role == "user") {
@@ -698,11 +702,37 @@ int32_t llm_chat_apply_template(
698702
if (role == "system") {
699703
ss << "<|startoftext|>" << message->content << "<|extra_4|>";
700704
} else if (role == "assistant") {
701-
ss << "<|startoftext|>" << message->content << "<|eos|>";
705+
ss << message->content << "<|eos|>";
702706
} else {
703707
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
704708
}
705709
}
710+
} else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
711+
// OpenAI MoE (based on Harmony chat template)
712+
for (auto message : chat) {
713+
std::string role(message->role);
714+
ss << "<|start|>" << role << "<|message|>" << message->content;
715+
ss << (role == "assistant" ? "<|return|>" : "<|end|>");
716+
}
717+
if (add_ass) {
718+
ss << "<|start|>assistant";
719+
}
720+
} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
721+
// tencent/Hunyuan-4B-Instruct
722+
for (size_t i = 0; i < chat.size(); i++) {
723+
std::string role(chat[i]->role);
724+
if (i == 0) {
725+
if (role == "system") {
726+
ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>";
727+
}
728+
}
729+
730+
if (role == "assistant") {
731+
ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>";
732+
} else if (role == "user") {
733+
ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>";
734+
}
735+
}
706736
} else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
707737
// moonshotai/Kimi-K2-Instruct
708738
for (auto message : chat) {

examples/talk-llama/llama-chat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ enum llm_chat_template {
4646
LLM_CHAT_TEMPLATE_SMOLVLM,
4747
LLM_CHAT_TEMPLATE_DOTS1,
4848
LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
49+
LLM_CHAT_TEMPLATE_OPENAI_MOE,
50+
LLM_CHAT_TEMPLATE_HUNYUAN_DENSE,
4951
LLM_CHAT_TEMPLATE_KIMI_K2,
5052
LLM_CHAT_TEMPLATE_UNKNOWN,
5153
};

0 commit comments

Comments
 (0)