Skip to content

Commit 53a6f4a

Browse files
committed
fix
1 parent 66fdcc8 commit 53a6f4a

File tree

18 files changed

+24
-50
lines changed

18 files changed

+24
-50
lines changed

docs/source/inference/cli.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fastvideo generate --help
2727
### Hardware Configuration
2828

2929
- `--num-gpus {NUM_GPUS}`: Number of GPUs to use
30-
- `--tp-size {TP_SIZE}`: Tensor parallelism size (Typically should match the number of GPUs)
30+
- `--tp-size {TP_SIZE}`: Tensor parallelism size (only for the encoder, should not be larger than 1 if text encoder offload is enabled, as layerwise offload + prefetch is faster)
3131
- `--sp-size {SP_SIZE}`: Sequence parallelism size (Typically should match the number of GPUs)
3232

3333
#### Video Configuration
@@ -68,7 +68,7 @@ Example configuration file (config.json):
6868
"output_path": "outputs/",
6969
"num_gpus": 2,
7070
"sp_size": 2,
71-
"tp_size": 2,
71+
"tp_size": 1,
7272
"num_frames": 45,
7373
"height": 720,
7474
"width": 1280,
@@ -102,7 +102,7 @@ prompt: "A beautiful woman in a red dress walking down a street"
102102
output_path: "outputs/"
103103
num_gpus: 2
104104
sp_size: 2
105-
tp_size: 2
105+
tp_size: 1
106106
num_frames: 45
107107
height: 720
108108
width: 1280

examples/training/finetune/wan_i2v_14b_480p/crush_smol/finetune_i2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ training_args=(
3030
parallel_args=(
3131
--num_gpus $NUM_GPUS
3232
--sp_size 8
33-
--tp_size 8
33+
--tp_size 1
3434
--hsdp_replicate_dim 1
3535
--hsdp_shard_dim 8
3636
)

examples/training/finetune/wan_i2v_14b_480p/crush_smol/finetune_i2v.slurm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ training_args=(
6666
parallel_args=(
6767
--num_gpus $NUM_GPUS
6868
--sp_size $NUM_GPUS
69-
--tp_size $NUM_GPUS
69+
--tp_size 1
7070
--hsdp_replicate_dim $SLURM_JOB_NUM_NODES
7171
--hsdp_shard_dim $NUM_GPUS
7272
)

examples/training/finetune/wan_t2v_1_3b/crush_smol/finetune_t2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ training_args=(
3030
parallel_args=(
3131
--num_gpus $NUM_GPUS
3232
--sp_size $NUM_GPUS
33-
--tp_size $NUM_GPUS
33+
--tp_size 1
3434
--hsdp_replicate_dim 1
3535
--hsdp_shard_dim $NUM_GPUS
3636
)

examples/training/finetune/wan_t2v_1_3b/crush_smol/finetune_t2v.slurm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ training_args=(
6363
parallel_args=(
6464
--num_gpus $NUM_GPUS
6565
--sp_size 4
66-
--tp_size 4
66+
--tp_size 1
6767
--hsdp_replicate_dim 2
6868
--hsdp_shard_dim 4
6969
)

fastvideo/v1/fastvideo_args.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def check_fastvideo_args(self) -> None:
292292
assert self.sp_size != -1, "sp_size must be set for training"
293293

294294
if self.tp_size == -1:
295-
self.tp_size = self.num_gpus
295+
self.tp_size = 1
296296
if self.sp_size == -1:
297297
self.sp_size = self.num_gpus
298298
if self.hsdp_shard_dim == -1:
@@ -305,11 +305,6 @@ def check_fastvideo_args(self) -> None:
305305
if self.num_gpus < max(self.tp_size, self.sp_size):
306306
self.num_gpus = max(self.tp_size, self.sp_size)
307307

308-
if self.tp_size != self.sp_size:
309-
raise ValueError(
310-
f"tp_size ({self.tp_size}) must be equal to sp_size ({self.sp_size})"
311-
)
312-
313308
if self.enable_torch_compile and self.num_gpus > 1:
314309
logger.warning(
315310
"Currently torch compile does not work with multi-gpu. Setting enable_torch_compile to False"

fastvideo/v1/tests/nightly/test_e2e_i2v_overfit_single_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def run_training():
105105
"--num_latent_t", "8",
106106
"--num_gpus", NUM_GPUS_PER_NODE_TRAINING,
107107
"--sp_size", NUM_GPUS_PER_NODE_TRAINING,
108-
"--tp_size", NUM_GPUS_PER_NODE_TRAINING,
108+
"--tp_size", 1,
109109
"--hsdp_replicate_dim", "1",
110110
"--hsdp_shard_dim", NUM_GPUS_PER_NODE_TRAINING,
111111
"--num_gpus", NUM_GPUS_PER_NODE_TRAINING,

fastvideo/v1/tests/ssim/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ FastHunyuan-diffusers: {
2424
"flow_shift": 17,
2525
"seed": 1024,
2626
"sp_size": 2,
27-
"tp_size": 2,
27+
"tp_size": 1,
2828
"vae_sp": true,
2929
"fps": 24
3030
}
@@ -41,7 +41,7 @@ Wan2.1-T2V-1.3B-Diffusers: {
4141
"flow_shift": 7.0,
4242
"seed": 1024,
4343
"sp_size": 2,
44-
"tp_size": 2,
44+
"tp_size": 1,
4545
"vae_sp": True,
4646
"fps": 24,
4747
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
@@ -60,7 +60,7 @@ Wan2.1-I2V-14B-480P-Diffusers: {
6060
"flow_shift": 7.0,
6161
"seed": 1024,
6262
"sp_size": 2,
63-
"tp_size": 2,
63+
"tp_size": 1,
6464
"vae_sp": True,
6565
"fps": 24,
6666
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",

fastvideo/v1/tests/ssim/test_inference_similarity.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"flow_shift": 17,
3434
"seed": 1024,
3535
"sp_size": 2,
36-
"tp_size": 2,
36+
"tp_size": 1,
3737
"vae_sp": True,
3838
"fps": 24,
3939
}
@@ -50,7 +50,7 @@
5050
"flow_shift": 7.0,
5151
"seed": 1024,
5252
"sp_size": 2,
53-
"tp_size": 2,
53+
"tp_size": 1,
5454
"vae_sp": True,
5555
"fps": 24,
5656
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
@@ -69,7 +69,7 @@
6969
"flow_shift": 7.0,
7070
"seed": 1024,
7171
"sp_size": 2,
72-
"tp_size": 2,
72+
"tp_size": 1,
7373
"vae_sp": True,
7474
"fps": 24,
7575
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",

fastvideo/v1/tests/training/Vanilla/test_training_loss.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def run_worker():
4343
"--num_latent_t", "4",
4444
"--num_gpus", "4",
4545
"--sp_size", "4",
46-
"--tp_size", "4",
46+
"--tp_size", "1",
4747
"--hsdp_replicate_dim", "1",
4848
"--hsdp_shard_dim", "4",
4949
"--train_sp_batch_size", "1",

0 commit comments

Comments
 (0)