Spaces:

ByteDance-Seed
/

SeedVR2-3B

Running on Zero

IceClear

restore norm

aebdeba about 1 month ago

1.83 kB

	__object__:
	path: projects.video_diffusion_sr.train
	name: VideoDiffusionTrainer

	dit:
	model:
	__object__:
	path: models.dit_v2.nadit
	name: NaDiT
	args: as_params
	vid_in_channels: 33
	vid_out_channels: 16
	vid_dim: 2560
	vid_out_norm: fusedrms
	txt_in_dim: 5120
	txt_in_norm: fusedln
	txt_dim: ${.vid_dim}
	emb_dim: ${eval:'6 * ${.vid_dim}'}
	heads: 20
	head_dim: 128 # llm-like
	expand_ratio: 4
	norm: fusedrms
	norm_eps: 1.0e-05
	ada: single
	qk_bias: False
	qk_norm: fusedrms
	patch_size: [ 1,2,2 ]
	num_layers: 32 # llm-like
	mm_layers: 10
	mlp_type: swiglu
	msa_type: None
	block_type: ${eval:'${.num_layers} * ["mmdit_sr"]'} # space-full
	window: ${eval:'${.num_layers} * [(4,3,3)]'} # space-full
	window_method: ${eval:'${.num_layers} // 2 * ["720pwin_by_size_bysize","720pswin_by_size_bysize"]'} # space-full
	rope_type: mmrope3d
	rope_dim: 128
	compile: False
	gradient_checkpoint: True
	fsdp:
	sharding_strategy: _HYBRID_SHARD_ZERO2

	ema:
	decay: 0.9998

	vae:
	model:
	__inherit__: models/video_vae_v3/s8_c16_t4_inflation_sd3.yaml
	freeze_encoder: False
	# gradient_checkpoint: True
	slicing:
	split_size: 4
	memory_device: same
	memory_limit:
	conv_max_mem: 0.5
	norm_max_mem: 0.5
	checkpoint: ./ckpts/ema_vae.pth
	scaling_factor: 0.9152
	compile: False
	grouping: False
	dtype: bfloat16

	diffusion:
	schedule:
	type: lerp
	T: 1000.0
	sampler:
	type: euler
	prediction_type: v_lerp
	timesteps:
	training:
	type: logitnormal
	loc: 0.0
	scale: 1.0
	sampling:
	type: uniform_trailing
	steps: 50
	transform: True
	loss:
	type: v_lerp
	cfg:
	scale: 7.5
	rescale: 0

	condition:
	i2v: 0.0
	v2v: 0.0
	sr: 1.0
	noise_scale: 0.25