@@ -3695,6 +3695,10 @@ def set_gguf_parameters(self):
3695
3695
self .gguf_writer .add_causal_attention (False )
3696
3696
self ._try_set_pooling_type ()
3697
3697
3698
+ if cls_out_labels := self .hparams .get ("id2label" ):
3699
+ key_name = gguf .Keys .Classifier .OUTPUT_LABELS .format (arch = gguf .MODEL_ARCH_NAMES [self .model_arch ])
3700
+ self .gguf_writer .add_array (key_name , [v for k , v in sorted (cls_out_labels .items ())])
3701
+
3698
3702
def set_vocab (self ):
3699
3703
tokens , toktypes , tokpre = self .get_vocab_base ()
3700
3704
self .vocab_size = len (tokens )
@@ -3745,12 +3749,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
3745
3749
if name .startswith ("cls.seq_relationship" ):
3746
3750
return []
3747
3751
3748
- # For BertForSequenceClassification (direct projection layer)
3749
- if name == "classifier.weight" :
3750
- name = "classifier.out_proj.weight"
3752
+ if self .hparams .get ("id2label" ):
3753
+ # For BertForSequenceClassification (direct projection layer)
3754
+ if name == "classifier.weight" :
3755
+ name = "classifier.out_proj.weight"
3751
3756
3752
- if name == "classifier.bias" :
3753
- name = "classifier.out_proj.bias"
3757
+ if name == "classifier.bias" :
3758
+ name = "classifier.out_proj.bias"
3754
3759
3755
3760
return [(self .map_tensor_name (name ), data_torch )]
3756
3761
@@ -3846,7 +3851,7 @@ def _xlmroberta_set_vocab(self) -> None:
3846
3851
self .gguf_writer .add_add_eos_token (True )
3847
3852
3848
3853
3849
- @ModelBase .register ("RobertaModel" )
3854
+ @ModelBase .register ("RobertaModel" , "RobertaForSequenceClassification" )
3850
3855
class RobertaModel (BertModel ):
3851
3856
model_arch = gguf .MODEL_ARCH .BERT
3852
3857
0 commit comments