@@ -230,3 +230,39 @@ class MockUnsupportedAsset:
230
230
231
231
with pytest .raises (ValueError , match = "Assets must be of type `Image`, `Video` or `Audio`" ):
232
232
adapter ._format_asset_for_template (unsupported_asset )
233
+
234
+
235
+ def test_transformers_multimodal_type_adapter_multiple_assets_in_single_item (adapter , image ):
236
+ image_asset = Image (image )
237
+ video_asset = Video ("dummy_video" )
238
+
239
+ chat_prompt = Chat (messages = [
240
+ {"role" : "user" , "content" : [
241
+ {"type" : "text" , "text" : "What's in this?" },
242
+ {"type" : "image" , "image" : image_asset , "video" : video_asset } # Multiple asset types
243
+ ]}
244
+ ])
245
+
246
+ with pytest .raises (ValueError , match = "Found item with multiple keys:" ):
247
+ adapter .format_input (chat_prompt )
248
+
249
+
250
+
251
+ def test_transformers_multimodal_type_adapter_correct_multiple_assets_usage (adapter , image ):
252
+ image_asset1 = Image (image )
253
+ image_asset2 = Image (image )
254
+
255
+ # Correct way: separate dictionary items for each asset
256
+ chat_prompt = Chat (messages = [
257
+ {"role" : "user" , "content" : [
258
+ {"type" : "text" , "text" : "What's in these images?" },
259
+ {"type" : "image" , "image" : image_asset1 },
260
+ {"type" : "image" , "image" : image_asset2 }
261
+ ]}
262
+ ])
263
+
264
+ result = adapter .format_input (chat_prompt )
265
+ assert isinstance (result , dict )
266
+ assert "text" in result
267
+ assert "images" in result
268
+ assert len (result ["images" ]) == 2
0 commit comments