We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents e4870dc + 4e5c114 commit 4d872c0Copy full SHA for 4d872c0
inference-modules/vllm/schemas.py
@@ -14,7 +14,7 @@ class ModelConfig(BaseModel):
14
gpu_memory_utilization: float = 0.9
15
tensor_parallel_size: int = 1
16
pipeline_parallel_size: int = 1
17
- max_model_len: int = 2048
+ max_model_len: int = 4096
18
num_scheduler_steps: int = 8
19
enable_prefix_caching: bool = True
20
0 commit comments