Instructions to use jxie/autorf-zero_shot-motion_predictor with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use jxie/autorf-zero_shot-motion_predictor with Transformers:
# Load model directly from transformers import MotionPredictorForRectifiedFlow model = MotionPredictorForRectifiedFlow.from_pretrained("jxie/autorf-zero_shot-motion_predictor", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "architectures": [ | |
| "MotionPredictorForRectifiedFlow" | |
| ], | |
| "camera_motion_conditioning_drop_prob": 0.1, | |
| "consistency_delta_time": 0.001, | |
| "consistency_loss_weight": 1.0, | |
| "consistency_velocity_match_alpha": 1e-05, | |
| "continuous_predictor": true, | |
| "default_track_rate": 30, | |
| "denoising_predictor_config": { | |
| "attention_bias": true, | |
| "attention_dropout": 0.0, | |
| "head_dim": 64, | |
| "hidden_act": "silu", | |
| "hidden_size": 768, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 2046, | |
| "mlp_bias": true, | |
| "model_type": "spatial_track", | |
| "num_attention_heads": 12, | |
| "num_hidden_layers": 4, | |
| "num_key_value_heads": 12, | |
| "rms_norm_eps": 1e-06, | |
| "track_dimensionality": 2 | |
| }, | |
| "depth_track_multiplier": 1.0, | |
| "encoder_config": null, | |
| "encoder_pooler_type": "none", | |
| "extra_features_size": 3, | |
| "feature_map_upsample_method": "nearest", | |
| "frame_sample_rate": null, | |
| "freeze_global_image_model": true, | |
| "freeze_image_model": true, | |
| "freeze_text_encoder": true, | |
| "global_image_model_name": "openai/clip-vit-base-patch32", | |
| "global_video_encoder_hidden_size": 768, | |
| "height": 256, | |
| "image_model_batch_size": null, | |
| "image_model_name": "facebook/sam2.1-hiera-tiny", | |
| "late_noise_conditioning": true, | |
| "mask_non_visible_tracks": false, | |
| "max_height_shift": null, | |
| "max_track_length": 50, | |
| "max_width_shift": null, | |
| "model_type": "motion_predictor", | |
| "movement_weighting_loss_temperature": null, | |
| "movement_weighting_temperature": 0.5, | |
| "noise_schedule_type": "cos", | |
| "num_global_image_tokens": 49, | |
| "position_encoding_norm_to_feature_norm_ratio": 1.0, | |
| "prepend_query_points": true, | |
| "rectified_flow_ema_beta": 0.9999, | |
| "rectified_flow_ema_update_every": 100, | |
| "rectified_flow_relative_prediction": true, | |
| "rectified_flow_use_consistency": true, | |
| "sam2_image_size": 512, | |
| "text_conditioning_drop_prob": 0.1, | |
| "text_encoder_max_seq_length": 32, | |
| "text_encoder_name": "openai/clip-vit-base-patch32", | |
| "timestep_exponential_decay_loss_factor": null, | |
| "torch_dtype": "float32", | |
| "track_dimensionality": 2, | |
| "track_predictor_config": { | |
| "attention_bias": true, | |
| "attention_dropout": 0.0, | |
| "head_dim": 64, | |
| "hidden_act": "silu", | |
| "hidden_size": 768, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 2046, | |
| "max_position_embeddings": 1024, | |
| "mlp_bias": true, | |
| "model_type": "joint_track", | |
| "num_attention_heads": 12, | |
| "num_hidden_layers": 12, | |
| "num_key_value_heads": 12, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 10000.0, | |
| "use_cache": true | |
| }, | |
| "track_predictor_has_spatial_condition": true, | |
| "track_predictor_head_kwargs": { | |
| "concatenate_original_position": true, | |
| "norm_feature": true, | |
| "predictor_type": "linear" | |
| }, | |
| "track_predictor_head_type": "denoising_predictor", | |
| "track_rate_conditioning_drop_prob": 0.0, | |
| "track_rate_conditioning_max_track_rate": 60, | |
| "track_subsample_count": 100, | |
| "transformers_version": "4.50.3", | |
| "use_absolute_positional_embeddings": false, | |
| "use_camera_motion_conditioning": true, | |
| "use_previous_relative_shift": true, | |
| "use_previous_relative_shift_input": false, | |
| "use_track_rate_conditioning": false, | |
| "video_conditioning_drop_prob": 0.0, | |
| "video_encoder_hidden_size": 352, | |
| "visible_min_ratio": 0.5, | |
| "width": 256 | |
| } | |