@@ -254,7 +254,6 @@ def _process_multimodal(
254
254
mm_processor_kwargs : Optional [Mapping [str , object ]],
255
255
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
256
256
lora_request : Optional [LoRARequest ] = None ,
257
- return_mm_hashes : bool = False ,
258
257
) -> MultiModalInputs :
259
258
"""
260
259
Apply the model's multi-modal processor to a multi-modal prompt,
@@ -271,8 +270,7 @@ def _process_multimodal(
271
270
return mm_processor .apply (prompt ,
272
271
mm_data ,
273
272
hf_processor_mm_kwargs = mm_processor_kwargs ,
274
- tokenization_kwargs = tokenization_kwargs ,
275
- return_mm_hashes = return_mm_hashes )
273
+ tokenization_kwargs = tokenization_kwargs )
276
274
277
275
async def _process_multimodal_async (
278
276
self ,
@@ -281,7 +279,6 @@ async def _process_multimodal_async(
281
279
mm_processor_kwargs : Optional [Mapping [str , object ]],
282
280
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
283
281
lora_request : Optional [LoRARequest ] = None ,
284
- return_mm_hashes : bool = False ,
285
282
) -> MultiModalInputs :
286
283
"""
287
284
Async version of
@@ -297,8 +294,7 @@ async def _process_multimodal_async(
297
294
return mm_processor .apply (prompt ,
298
295
mm_data ,
299
296
hf_processor_mm_kwargs = mm_processor_kwargs ,
300
- tokenization_kwargs = tokenization_kwargs ,
301
- return_mm_hashes = return_mm_hashes )
297
+ tokenization_kwargs = tokenization_kwargs )
302
298
303
299
def _process_embeds (
304
300
self ,
@@ -335,7 +331,6 @@ def _process_tokens(
335
331
parsed_content : TokensPrompt ,
336
332
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
337
333
lora_request : Optional [LoRARequest ] = None ,
338
- return_mm_hashes : bool = False ,
339
334
) -> Union [TokenInputs , MultiModalInputs ]:
340
335
prompt_token_ids = parsed_content ["prompt_token_ids" ]
341
336
token_type_ids = parsed_content .get ("token_type_ids" )
@@ -348,7 +343,6 @@ def _process_tokens(
348
343
parsed_content .get ("mm_processor_kwargs" ),
349
344
tokenization_kwargs = tokenization_kwargs ,
350
345
lora_request = lora_request ,
351
- return_mm_hashes = return_mm_hashes ,
352
346
)
353
347
else :
354
348
inputs = token_inputs (
@@ -366,7 +360,6 @@ async def _process_tokens_async(
366
360
parsed_content : TokensPrompt ,
367
361
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
368
362
lora_request : Optional [LoRARequest ] = None ,
369
- return_mm_hashes : bool = False ,
370
363
) -> Union [TokenInputs , MultiModalInputs ]:
371
364
prompt_token_ids = parsed_content ["prompt_token_ids" ]
372
365
token_type_ids = parsed_content .get ("token_type_ids" )
@@ -379,7 +372,6 @@ async def _process_tokens_async(
379
372
parsed_content .get ("mm_processor_kwargs" ),
380
373
tokenization_kwargs = tokenization_kwargs ,
381
374
lora_request = lora_request ,
382
- return_mm_hashes = return_mm_hashes ,
383
375
)
384
376
else :
385
377
inputs = token_inputs (
@@ -397,7 +389,6 @@ def _process_text(
397
389
parsed_content : TextPrompt ,
398
390
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
399
391
lora_request : Optional [LoRARequest ] = None ,
400
- return_mm_hashes : bool = False ,
401
392
) -> Union [TokenInputs , MultiModalInputs ]:
402
393
prompt_text = parsed_content ["prompt" ]
403
394
@@ -409,7 +400,6 @@ def _process_text(
409
400
parsed_content .get ("mm_processor_kwargs" ),
410
401
tokenization_kwargs = tokenization_kwargs ,
411
402
lora_request = lora_request ,
412
- return_mm_hashes = return_mm_hashes ,
413
403
)
414
404
else :
415
405
prompt_token_ids = self ._tokenize_prompt (
@@ -432,7 +422,6 @@ async def _process_text_async(
432
422
parsed_content : TextPrompt ,
433
423
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
434
424
lora_request : Optional [LoRARequest ] = None ,
435
- return_mm_hashes : bool = False ,
436
425
) -> Union [TokenInputs , MultiModalInputs ]:
437
426
prompt_text = parsed_content ["prompt" ]
438
427
@@ -444,7 +433,6 @@ async def _process_text_async(
444
433
parsed_content .get ("mm_processor_kwargs" ),
445
434
tokenization_kwargs = tokenization_kwargs ,
446
435
lora_request = lora_request ,
447
- return_mm_hashes = return_mm_hashes ,
448
436
)
449
437
else :
450
438
prompt_token_ids = await self ._tokenize_prompt_async (
@@ -467,7 +455,6 @@ def _prompt_to_llm_inputs(
467
455
prompt : SingletonPrompt ,
468
456
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
469
457
lora_request : Optional [LoRARequest ] = None ,
470
- return_mm_hashes : bool = False ,
471
458
) -> SingletonInputs :
472
459
"""
473
460
Extract the singleton inputs from a prompt.
@@ -476,7 +463,6 @@ def _prompt_to_llm_inputs(
476
463
477
464
* prompt: single encoder or decoder input prompt
478
465
* lora_request: this is only valid for decoder prompts
479
- * return_mm_hashes: whether to return multimodal hashes
480
466
481
467
Returns:
482
468
@@ -490,21 +476,18 @@ def _prompt_to_llm_inputs(
490
476
return self ._process_tokens (
491
477
parsed ["content" ],
492
478
lora_request = lora_request ,
493
- return_mm_hashes = return_mm_hashes ,
494
479
)
495
480
if parsed ["type" ] == "text" :
496
481
return self ._process_text (
497
482
parsed ["content" ],
498
483
tokenization_kwargs = tokenization_kwargs ,
499
484
lora_request = lora_request ,
500
- return_mm_hashes = return_mm_hashes ,
501
485
)
502
486
if parsed ["type" ] == "str" :
503
487
return self ._process_text (
504
488
TextPrompt (prompt = parsed ["content" ]),
505
489
tokenization_kwargs = tokenization_kwargs ,
506
490
lora_request = lora_request ,
507
- return_mm_hashes = return_mm_hashes ,
508
491
)
509
492
510
493
assert_never (parsed )
@@ -514,7 +497,6 @@ async def _prompt_to_llm_inputs_async(
514
497
prompt : SingletonPrompt ,
515
498
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
516
499
lora_request : Optional [LoRARequest ] = None ,
517
- return_mm_hashes : bool = False ,
518
500
) -> SingletonInputs :
519
501
"""
520
502
Async version of
@@ -528,21 +510,18 @@ async def _prompt_to_llm_inputs_async(
528
510
return await self ._process_tokens_async (
529
511
parsed ["content" ],
530
512
lora_request = lora_request ,
531
- return_mm_hashes = return_mm_hashes ,
532
513
)
533
514
if parsed ["type" ] == "text" :
534
515
return await self ._process_text_async (
535
516
parsed ["content" ],
536
517
tokenization_kwargs = tokenization_kwargs ,
537
518
lora_request = lora_request ,
538
- return_mm_hashes = return_mm_hashes ,
539
519
)
540
520
if parsed ["type" ] == "str" :
541
521
return await self ._process_text_async (
542
522
TextPrompt (prompt = parsed ["content" ]),
543
523
tokenization_kwargs = tokenization_kwargs ,
544
524
lora_request = lora_request ,
545
- return_mm_hashes = return_mm_hashes ,
546
525
)
547
526
548
527
assert_never (parsed )
@@ -785,7 +764,6 @@ def _process_decoder_only_prompt(
785
764
prompt : SingletonPrompt ,
786
765
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
787
766
lora_request : Optional [LoRARequest ] = None ,
788
- return_mm_hashes : bool = False ,
789
767
) -> DecoderOnlyInputs :
790
768
"""
791
769
For decoder-only models:
@@ -796,7 +774,6 @@ def _process_decoder_only_prompt(
796
774
797
775
* prompt: input prompt
798
776
* lora_request
799
- * return_mm_hashes
800
777
801
778
Returns:
802
779
@@ -807,7 +784,6 @@ def _process_decoder_only_prompt(
807
784
prompt ,
808
785
tokenization_kwargs = tokenization_kwargs ,
809
786
lora_request = lora_request ,
810
- return_mm_hashes = return_mm_hashes ,
811
787
)
812
788
813
789
return self ._build_decoder_only_llm_inputs (prompt_comps )
@@ -817,7 +793,6 @@ async def _process_decoder_only_prompt_async(
817
793
prompt : SingletonPrompt ,
818
794
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
819
795
lora_request : Optional [LoRARequest ] = None ,
820
- return_mm_hashes : bool = False ,
821
796
) -> DecoderOnlyInputs :
822
797
"""
823
798
Async version of
@@ -827,7 +802,6 @@ async def _process_decoder_only_prompt_async(
827
802
prompt ,
828
803
tokenization_kwargs = tokenization_kwargs ,
829
804
lora_request = lora_request ,
830
- return_mm_hashes = return_mm_hashes ,
831
805
)
832
806
833
807
return self ._build_decoder_only_llm_inputs (prompt_comps )
@@ -837,17 +811,15 @@ def preprocess(
837
811
prompt : PromptType ,
838
812
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
839
813
lora_request : Optional [LoRARequest ] = None ,
840
- return_mm_hashes : bool = False ,
841
814
) -> ProcessorInputs :
842
815
"""Preprocess the input prompt."""
843
816
if self .model_config .is_encoder_decoder :
844
- assert not return_mm_hashes , (
845
- "Multimodal hashes for encoder-decoder models should not be " ,
846
- "returned until they are supported on vLLM V1." )
847
817
# Encoder-decoder model requires special mapping of
848
- # input prompts to encoder & decoder
818
+ # input prompts to encoder & decoder.
849
819
return self ._process_encoder_decoder_prompt (
850
- prompt , tokenization_kwargs )
820
+ prompt ,
821
+ tokenization_kwargs ,
822
+ )
851
823
852
824
if is_explicit_encoder_decoder_prompt (prompt ):
853
825
raise ValueError ("Cannot pass encoder-decoder prompt "
@@ -858,27 +830,25 @@ def preprocess(
858
830
prompt ,
859
831
tokenization_kwargs = tokenization_kwargs ,
860
832
lora_request = lora_request ,
861
- return_mm_hashes = return_mm_hashes ,
862
833
)
863
834
864
835
async def preprocess_async (
865
836
self ,
866
837
prompt : PromptType ,
867
838
tokenization_kwargs : Optional [dict [str , Any ]] = None ,
868
839
lora_request : Optional [LoRARequest ] = None ,
869
- return_mm_hashes : bool = False ,
870
840
) -> ProcessorInputs :
871
841
"""
872
842
Async version of
873
843
[`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
874
844
"""
875
845
if self .model_config .is_encoder_decoder :
876
- assert not return_mm_hashes , (
877
- "Multimodal hashes for encoder-decoder models should not be " ,
878
- "returned until they are supported on vLLM V1." )
879
846
# Encoder-decoder model requires special mapping of
880
- # input prompts to encoder & decoder
881
- return await self ._process_encoder_decoder_prompt_async (prompt )
847
+ # input prompts to encoder & decoder.
848
+ return await self ._process_encoder_decoder_prompt_async (
849
+ prompt ,
850
+ tokenization_kwargs ,
851
+ )
882
852
883
853
if is_explicit_encoder_decoder_prompt (prompt ):
884
854
raise ValueError ("Cannot pass encoder-decoder prompt "
@@ -889,5 +859,4 @@ async def preprocess_async(
889
859
prompt ,
890
860
tokenization_kwargs = tokenization_kwargs ,
891
861
lora_request = lora_request ,
892
- return_mm_hashes = return_mm_hashes ,
893
862
)
0 commit comments