@@ -62,6 +62,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
62
62
{ LLM_ARCH_DEEPSEEK2, " deepseek2" },
63
63
{ LLM_ARCH_CHATGLM, " chatglm" },
64
64
{ LLM_ARCH_GLM4, " glm4" },
65
+ { LLM_ARCH_GLM4_MOE, " glm4moe" },
65
66
{ LLM_ARCH_BITNET, " bitnet" },
66
67
{ LLM_ARCH_T5, " t5" },
67
68
{ LLM_ARCH_T5ENCODER, " t5encoder" },
@@ -85,9 +86,13 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
85
86
{ LLM_ARCH_ERNIE4_5, " ernie4_5" },
86
87
{ LLM_ARCH_ERNIE4_5_MOE, " ernie4_5-moe" },
87
88
{ LLM_ARCH_HUNYUAN_MOE, " hunyuan-moe" },
89
+ { LLM_ARCH_HUNYUAN_DENSE, " hunyuan-dense" },
88
90
{ LLM_ARCH_SMOLLM3, " smollm3" },
91
+ { LLM_ARCH_OPENAI_MOE, " gpt-oss" },
89
92
{ LLM_ARCH_LFM2, " lfm2" },
90
93
{ LLM_ARCH_DREAM, " dream" },
94
+ { LLM_ARCH_SMALLTHINKER, " smallthinker" },
95
+ { LLM_ARCH_LLADA, " llada" },
91
96
{ LLM_ARCH_UNKNOWN, " (unknown)" },
92
97
};
93
98
@@ -124,6 +129,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
124
129
{ LLM_KV_EXPERT_WEIGHTS_NORM, " %s.expert_weights_norm" },
125
130
{ LLM_KV_EXPERT_GATING_FUNC, " %s.expert_gating_func" },
126
131
{ LLM_KV_MOE_EVERY_N_LAYERS, " %s.moe_every_n_layers" },
132
+ { LLM_KV_NEXTN_PREDICT_LAYERS, " %s.nextn_predict_layers" },
127
133
{ LLM_KV_POOLING_TYPE, " %s.pooling_type" },
128
134
{ LLM_KV_LOGIT_SCALE, " %s.logit_scale" },
129
135
{ LLM_KV_DECODER_START_TOKEN_ID, " %s.decoder_start_token_id" },
@@ -1388,6 +1394,40 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1388
1394
{ LLM_TENSOR_FFN_POST_NORM, " blk.%d.post_ffw_norm" },
1389
1395
},
1390
1396
},
1397
+ {
1398
+ LLM_ARCH_GLM4_MOE,
1399
+ {
1400
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1401
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1402
+ { LLM_TENSOR_OUTPUT, " output" },
1403
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1404
+ { LLM_TENSOR_ATTN_POST_NORM, " blk.%d.post_attention_norm" },
1405
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1406
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1407
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1408
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1409
+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
1410
+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
1411
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1412
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1413
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1414
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
1415
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
1416
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
1417
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1418
+ { LLM_TENSOR_FFN_GATE_SHEXP, " blk.%d.ffn_gate_shexp" },
1419
+ { LLM_TENSOR_FFN_DOWN_SHEXP, " blk.%d.ffn_down_shexp" },
1420
+ { LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
1421
+ { LLM_TENSOR_FFN_EXP_PROBS_B, " blk.%d.exp_probs_b" },
1422
+ // NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1423
+ { LLM_TENSOR_NEXTN_EH_PROJ, " blk.%d.nextn.eh_proj" },
1424
+ { LLM_TENSOR_NEXTN_EMBED_TOKENS, " blk.%d.nextn.embed_tokens" },
1425
+ { LLM_TENSOR_NEXTN_ENORM, " blk.%d.nextn.enorm" },
1426
+ { LLM_TENSOR_NEXTN_HNORM, " blk.%d.nextn.hnorm" },
1427
+ { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, " blk.%d.nextn.shared_head_head" },
1428
+ { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, " blk.%d.nextn.shared_head_norm" },
1429
+ },
1430
+ },
1391
1431
{
1392
1432
LLM_ARCH_BITNET,
1393
1433
{
@@ -1895,6 +1935,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1895
1935
{ LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1896
1936
},
1897
1937
},
1938
+ {
1939
+ LLM_ARCH_HUNYUAN_DENSE,
1940
+ {
1941
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1942
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1943
+ { LLM_TENSOR_OUTPUT, " output" },
1944
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1945
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1946
+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
1947
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1948
+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
1949
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1950
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1951
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1952
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1953
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1954
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1955
+
1956
+ },
1957
+ },
1898
1958
{
1899
1959
LLM_ARCH_SMOLLM3,
1900
1960
{
@@ -1912,6 +1972,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1912
1972
{ LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1913
1973
},
1914
1974
},
1975
+ {
1976
+ LLM_ARCH_OPENAI_MOE,
1977
+ {
1978
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1979
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1980
+ { LLM_TENSOR_OUTPUT, " output" },
1981
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1982
+ { LLM_TENSOR_ATTN_POST_NORM, " blk.%d.post_attention_norm" },
1983
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1984
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1985
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1986
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1987
+ { LLM_TENSOR_ATTN_SINKS, " blk.%d.attn_sinks" },
1988
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
1989
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
1990
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
1991
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1992
+ },
1993
+ },
1915
1994
{
1916
1995
LLM_ARCH_LFM2,
1917
1996
{
@@ -1933,6 +2012,27 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1933
2012
{ LLM_TENSOR_TOKEN_EMBD_NORM, " token_embd_norm" },
1934
2013
}
1935
2014
},
2015
+ {
2016
+ LLM_ARCH_SMALLTHINKER,
2017
+ {
2018
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
2019
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
2020
+ { LLM_TENSOR_OUTPUT, " output" },
2021
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
2022
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
2023
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
2024
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
2025
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
2026
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
2027
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
2028
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
2029
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
2030
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
2031
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
2032
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
2033
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" }
2034
+ },
2035
+ },
1936
2036
{
1937
2037
LLM_ARCH_DREAM,
1938
2038
{
@@ -1950,6 +2050,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1950
2050
{ LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1951
2051
},
1952
2052
},
2053
+ {
2054
+ LLM_ARCH_LLADA,
2055
+ {
2056
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
2057
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
2058
+ { LLM_TENSOR_OUTPUT, " output" },
2059
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
2060
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
2061
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
2062
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
2063
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
2064
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
2065
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
2066
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
2067
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
2068
+ },
2069
+ },
1953
2070
{
1954
2071
LLM_ARCH_UNKNOWN,
1955
2072
{
@@ -1989,6 +2106,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1989
2106
{LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1990
2107
{LLM_TENSOR_ATTN_K_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1991
2108
{LLM_TENSOR_ATTN_V_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2109
+ {LLM_TENSOR_ATTN_SINKS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SCALE}},
1992
2110
{LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1993
2111
{LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1994
2112
{LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
@@ -2120,6 +2238,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
2120
2238
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2121
2239
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2122
2240
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2241
+ // NextN/MTP tensors are currently ignored (reserved for future MTP support)
2242
+ // These tensors only exist in the last layer(s) and are treated as output tensors
2243
+ {LLM_TENSOR_NEXTN_EH_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2244
+ {LLM_TENSOR_NEXTN_EMBED_TOKENS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2245
+ {LLM_TENSOR_NEXTN_ENORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2246
+ {LLM_TENSOR_NEXTN_HNORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2247
+ {LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2248
+ {LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2123
2249
};
2124
2250
2125
2251
LLM_KV::LLM_KV (llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -2202,6 +2328,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
2202
2328
bool llm_arch_is_diffusion (const llm_arch & arch) {
2203
2329
switch (arch) {
2204
2330
case LLM_ARCH_DREAM:
2331
+ case LLM_ARCH_LLADA:
2205
2332
return true ;
2206
2333
default :
2207
2334
return false ;
0 commit comments