AnhP commited on Jun 17

Commit

acff07e

verified ·

1 Parent(s): a80535c

Delete audioldm

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

audioldm/README.txt +0 -29
audioldm/audioldm2-large/feature_extractor/preprocessor_config.json +0 -22
audioldm/audioldm2-large/language_model/config.json +0 -39
audioldm/audioldm2-large/language_model/model.safetensors +0 -3
audioldm/audioldm2-large/language_model/pytorch_model.bin +0 -3
audioldm/audioldm2-large/model_index.json +0 -48
audioldm/audioldm2-large/projection_model/config.json +0 -7
audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.bin +0 -3
audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.safetensors +0 -3
audioldm/audioldm2-large/scheduler/scheduler_config.json +0 -19
audioldm/audioldm2-large/text_encoder/config.json +0 -35
audioldm/audioldm2-large/text_encoder/model.safetensors +0 -3
audioldm/audioldm2-large/text_encoder/pytorch_model.bin +0 -3
audioldm/audioldm2-large/text_encoder_2/config.json +0 -32
audioldm/audioldm2-large/text_encoder_2/model.safetensors +0 -3
audioldm/audioldm2-large/text_encoder_2/pytorch_model.bin +0 -3
audioldm/audioldm2-large/tokenizer/merges.txt +0 -0
audioldm/audioldm2-large/tokenizer/special_tokens_map.json +0 -15
audioldm/audioldm2-large/tokenizer/tokenizer.json +0 -0
audioldm/audioldm2-large/tokenizer/tokenizer_config.json +0 -20
audioldm/audioldm2-large/tokenizer/vocab.json +0 -0
audioldm/audioldm2-large/tokenizer_2/special_tokens_map.json +0 -107
audioldm/audioldm2-large/tokenizer_2/spiece.model +0 -3
audioldm/audioldm2-large/tokenizer_2/tokenizer.json +0 -0
audioldm/audioldm2-large/tokenizer_2/tokenizer_config.json +0 -112
audioldm/audioldm2-large/unet/config.json +0 -78
audioldm/audioldm2-large/unet/diffusion_pytorch_model.bin +0 -3
audioldm/audioldm2-large/unet/diffusion_pytorch_model.safetensors +0 -3
audioldm/audioldm2-large/vae/config.json +0 -28
audioldm/audioldm2-large/vae/diffusion_pytorch_model.bin +0 -3
audioldm/audioldm2-large/vae/diffusion_pytorch_model.safetensors +0 -3
audioldm/audioldm2-large/vocoder/config.json +0 -50
audioldm/audioldm2-large/vocoder/model.safetensors +0 -3
audioldm/audioldm2-large/vocoder/pytorch_model.bin +0 -3
audioldm/audioldm2-music/feature_extractor/preprocessor_config.json +0 -22
audioldm/audioldm2-music/language_model/config.json +0 -39
audioldm/audioldm2-music/language_model/model.safetensors +0 -3
audioldm/audioldm2-music/language_model/pytorch_model.bin +0 -3
audioldm/audioldm2-music/model_index.json +0 -48
audioldm/audioldm2-music/projection_model/config.json +0 -7
audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.bin +0 -3
audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.safetensors +0 -3
audioldm/audioldm2-music/scheduler/scheduler_config.json +0 -19
audioldm/audioldm2-music/text_encoder/config.json +0 -35
audioldm/audioldm2-music/text_encoder/model.safetensors +0 -3
audioldm/audioldm2-music/text_encoder/pytorch_model.bin +0 -3
audioldm/audioldm2-music/text_encoder_2/config.json +0 -32
audioldm/audioldm2-music/text_encoder_2/model.safetensors +0 -3
audioldm/audioldm2-music/text_encoder_2/pytorch_model.bin +0 -3
audioldm/audioldm2-music/tokenizer/merges.txt +0 -0

audioldm/README.txt DELETED Viewed

@@ -1,29 +0,0 @@
-Mô hình thuộc sở hữu của https://huggingface.co/cvssp
-Mô hình Audioldm2: https://huggingface.co/cvssp/audioldm2
-Bản Quyền: cc-by-nc-sa-4.0
-Mô hình Audioldm2-large: https://huggingface.co/cvssp/audioldm2-large
-Bản Quyền: cc-by-nc-sa-4.0
-Mô hình Audioldm2-music: https://huggingface.co/cvssp/audioldm2-music
-Bản Quyền: cc-by-nc-sa-4.0
-NẾU SỬ DỤNG CÁC MÔ HÌNH TỪ KHO LƯU TRỮ NÀY VUI LÒNG TUÂN THỦ BẢN QUYỀN TỪ CÁC MÔ HÌNH TRÊN!
-BẠN CHỊU TRÁCH NHIỆM HOÀN TOÀN ĐỐI VỚI BẤT KỲ THIỆT HẠI NÀO PHÁT SINH TỪ VIỆC SỬ DỤNG KHO LƯU TRỮ KHÔNG ĐÚNG CÁCH!
-TÔI SẼ KHÔNG CHỊU TRÁCH NHIỆM VỚI BẤT KỲ THIỆT HẠI TRỰC TIẾP HOẶC GIÁN TIẾP NÀO PHÁT SINH TỪ VIỆC SỬ DỤNG KHO LƯU TRỮ NÀY!
-Model owned by https://huggingface.co/cvssp
-Audioldm2 model: https://huggingface.co/cvssp/audioldm2
-Copyright: cc-by-nc-sa-4.0
-Audioldm2-large model: https://huggingface.co/cvssp/audioldm2-large
-Copyright: cc-by-nc-sa-4.0
-Audioldm2-music model: https://huggingface.co/cvssp/audioldm2-music
-Copyright: cc-by-nc-sa-4.0
-IF YOU USE MODELS FROM THIS ARCHIVE PLEASE COMPLY WITH THE COPYRIGHT FROM THE ABOVE MODELS!
-YOU ARE SOLELY RESPONSIBLE FOR ANY DAMAGES THAT RESULT FROM IMPROPER USE OF THE ARCHIVE!
-I WILL NOT BE LIABLE FOR ANY DIRECT OR INDIRECT DAMAGES THAT RESULT FROM THE USE OF THIS ARCHIVE!

audioldm/audioldm2-large/feature_extractor/preprocessor_config.json DELETED Viewed

@@ -1,22 +0,0 @@
-{
-  "chunk_length_s": 10,
-  "feature_extractor_type": "ClapFeatureExtractor",
-  "feature_size": 64,
-  "fft_window_size": 1024,
-  "frequency_max": 14000,
-  "frequency_min": 50,
-  "hop_length": 480,
-  "max_length_s": 10,
-  "n_fft": 1024,
-  "nb_frequency_bins": 513,
-  "nb_max_frames": 1000,
-  "nb_max_samples": 480000,
-  "padding": "repeatpad",
-  "padding_side": "right",
-  "padding_value": 0.0,
-  "processor_class": "ClapProcessor",
-  "return_attention_mask": false,
-  "sampling_rate": 48000,
-  "top_db": null,
-  "truncation": "rand_trunc"
-}

audioldm/audioldm2-large/language_model/config.json DELETED Viewed

@@ -1,39 +0,0 @@
-{
-  "activation_function": "gelu_new",
-  "architectures": [
-    "GPT2Model"
-  ],
-  "attn_pdrop": 0.1,
-  "bos_token_id": 50256,
-  "embd_pdrop": 0.1,
-  "eos_token_id": 50256,
-  "initializer_range": 0.02,
-  "layer_norm_epsilon": 1e-05,
-  "max_new_tokens": 8,
-  "model_type": "gpt2",
-  "n_ctx": 1024,
-  "n_embd": 768,
-  "n_head": 12,
-  "n_inner": null,
-  "n_layer": 12,
-  "n_positions": 1024,
-  "reorder_and_upcast_attn": false,
-  "resid_pdrop": 0.1,
-  "scale_attn_by_inverse_layer_idx": false,
-  "scale_attn_weights": true,
-  "summary_activation": null,
-  "summary_first_dropout": 0.1,
-  "summary_proj_to_labels": true,
-  "summary_type": "cls_index",
-  "summary_use_proj": true,
-  "task_specific_params": {
-    "text-generation": {
-      "do_sample": true,
-      "max_length": 50
-    }
-  },
-  "torch_dtype": "float32",
-  "transformers_version": "4.32.0.dev0",
-  "use_cache": true,
-  "vocab_size": 50257
-}

audioldm/audioldm2-large/language_model/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:350bb51cf4f23502f239009d12ce0230d166f9d3f5752196505c0a6d841a5514
-size 497772432

audioldm/audioldm2-large/language_model/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ca0febb7670925080c1737a3b36689cea7aefe48c2b45d2321f2ba0bbcbf08fc
-size 497803293

audioldm/audioldm2-large/model_index.json DELETED Viewed

@@ -1,48 +0,0 @@
-{
-  "_class_name": "AudioLDM2Pipeline",
-  "_diffusers_version": "0.20.0.dev0",
-  "feature_extractor": [
-    "transformers",
-    "ClapFeatureExtractor"
-  ],
-  "language_model": [
-    "transformers",
-    "GPT2Model"
-  ],
-  "projection_model": [
-    "audioldm2",
-    "AudioLDM2ProjectionModel"
-  ],
-  "scheduler": [
-    "diffusers",
-    "DDIMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "ClapModel"
-  ],
-  "text_encoder_2": [
-    "transformers",
-    "T5EncoderModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "RobertaTokenizerFast"
-  ],
-  "tokenizer_2": [
-    "transformers",
-    "T5TokenizerFast"
-  ],
-  "unet": [
-    "audioldm2",
-    "AudioLDM2UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ],
-  "vocoder": [
-    "transformers",
-    "SpeechT5HifiGan"
-  ]
-}

audioldm/audioldm2-large/projection_model/config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_class_name": "AudioLDM2ProjectionModel",
-  "_diffusers_version": "0.20.0.dev0",
-  "langauge_model_dim": 768,
-  "text_encoder_1_dim": 1024,
-  "text_encoder_dim": 512
-}

audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:abba0622501d8cd9c640a726df5da03c124bf966bba98809af620dc2f2681f5f
-size 4739951

audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d41d9363c1e9f7c455d1ce9d60d3c888793e295128a1bc722be96b9f5718a4ed
-size 4737688

audioldm/audioldm2-large/scheduler/scheduler_config.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "_class_name": "DDIMScheduler",
-  "_diffusers_version": "0.20.0.dev0",
-  "beta_end": 0.0195,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.0015,
-  "clip_sample": false,
-  "clip_sample_range": 1.0,
-  "dynamic_thresholding_ratio": 0.995,
-  "num_train_timesteps": 1000,
-  "prediction_type": "epsilon",
-  "rescale_betas_zero_snr": false,
-  "sample_max_value": 1.0,
-  "set_alpha_to_one": false,
-  "steps_offset": 1,
-  "thresholding": false,
-  "timestep_spacing": "leading",
-  "trained_betas": null
-}

audioldm/audioldm2-large/text_encoder/config.json DELETED Viewed

@@ -1,35 +0,0 @@
-{
-  "architectures": [
-    "ClapModel"
-  ],
-  "audio_config": {
-    "depths": [
-      2,
-      2,
-      12,
-      2
-    ],
-    "fusion_num_hidden_layers": 2,
-    "hidden_size": 1024,
-    "model_type": "clap_audio_model",
-    "patch_embeds_hidden_size": 128,
-    "projection_hidden_size": 768
-  },
-  "hidden_size": 768,
-  "initializer_factor": 1.0,
-  "logit_scale_init_value": 14.285714285714285,
-  "model_type": "clap",
-  "num_hidden_layers": 16,
-  "projection_dim": 512,
-  "projection_hidden_act": "relu",
-  "text_config": {
-    "classifier_dropout": null,
-    "fusion_hidden_size": 768,
-    "fusion_num_hidden_layers": 2,
-    "initializer_range": 0.02,
-    "model_type": "clap_text_model",
-    "projection_hidden_size": 768
-  },
-  "torch_dtype": "float64",
-  "transformers_version": "4.32.0.dev0"
-}

audioldm/audioldm2-large/text_encoder/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a4a47b4a637dd58e9edb7b64a06acf37328b7cc3eafb0b8a85df895cc9e45d09
-size 776327432

audioldm/audioldm2-large/text_encoder/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:637b3ff0f7b212cedafb00739521dc49d8f7953f12bfc1f76ff692f108a41ed0
-size 776444665

audioldm/audioldm2-large/text_encoder_2/config.json DELETED Viewed

@@ -1,32 +0,0 @@
-{
-  "architectures": [
-    "T5EncoderModel"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 2816,
-  "d_kv": 64,
-  "d_model": 1024,
-  "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 24,
-  "num_heads": 16,
-  "num_layers": 24,
-  "output_past": true,
-  "pad_token_id": 0,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.32.0.dev0",
-  "use_cache": true,
-  "vocab_size": 32128
-}

audioldm/audioldm2-large/text_encoder_2/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c1d0c8f1c739db9343c12ea4b0e3f2c97a833b3c072c251e91d97b7326fefb4e
-size 1364951064

audioldm/audioldm2-large/text_encoder_2/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8c4be8e23954ef72bd0d623206a46b7e1ab7fa23f530b7b9f691d40785273b27
-size 1364996921

audioldm/audioldm2-large/tokenizer/merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

audioldm/audioldm2-large/tokenizer/special_tokens_map.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": {
-    "content": "<mask>",
-    "lstrip": true,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "unk_token": "<unk>"
-}

audioldm/audioldm2-large/tokenizer/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

audioldm/audioldm2-large/tokenizer/tokenizer_config.json DELETED Viewed

@@ -1,20 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "errors": "replace",
-  "mask_token": "<mask>",
-  "max_length": null,
-  "model_max_length": 512,
-  "pad_to_multiple_of": null,
-  "pad_token": "<pad>",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
-  "processor_class": "ClapProcessor",
-  "sep_token": "</s>",
-  "tokenizer_class": "RobertaTokenizer",
-  "trim_offsets": true,
-  "unk_token": "<unk>"
-}

audioldm/audioldm2-large/tokenizer/vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff

audioldm/audioldm2-large/tokenizer_2/special_tokens_map.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
-}

audioldm/audioldm2-large/tokenizer_2/spiece.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
-size 791656

audioldm/audioldm2-large/tokenizer_2/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

audioldm/audioldm2-large/tokenizer_2/tokenizer_config.json DELETED Viewed

@@ -1,112 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "</s>",
-  "extra_ids": 100,
-  "model_max_length": 128,
-  "pad_token": "<pad>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "T5Tokenizer",
-  "unk_token": "<unk>"
-}

audioldm/audioldm2-large/unet/config.json DELETED Viewed

@@ -1,78 +0,0 @@
-{
-  "_class_name": "AudioLDM2UNet2DConditionModel",
-  "_diffusers_version": "0.20.0.dev0",
-  "act_fn": "silu",
-  "attention_head_dim": 8,
-  "block_out_channels": [
-    128,
-    256,
-    384,
-    640
-  ],
-  "class_embed_type": null,
-  "class_embeddings_concat": false,
-  "conv_in_kernel": 3,
-  "conv_out_kernel": 3,
-  "cross_attention_dim": [
-    [
-      null,
-      768,
-      1024,
-      null
-    ],
-    [
-      null,
-      768,
-      1024,
-      null
-    ],
-    [
-      null,
-      768,
-      1024,
-      null
-    ],
-    [
-      null,
-      768,
-      1024,
-      null
-    ]
-  ],
-  "down_block_types": [
-    "DownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 8,
-  "layers_per_block": 2,
-  "mid_block_scale_factor": 1,
-  "mid_block_type": "UNetMidBlock2DCrossAttn",
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_attention_heads": null,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 8,
-  "projection_class_embeddings_input_dim": null,
-  "resnet_time_scale_shift": "default",
-  "sample_size": 256,
-  "time_cond_proj_dim": null,
-  "time_embedding_act_fn": null,
-  "time_embedding_dim": null,
-  "time_embedding_type": "positional",
-  "timestep_post_act": null,
-  "transformer_layers_per_block": 2,
-  "up_block_types": [
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "UpBlock2D"
-  ],
-  "upcast_attention": false,
-  "use_linear_projection": false
-}

audioldm/audioldm2-large/unet/diffusion_pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0002541e3d5ef789055304622fcf5d9b810dc413724ab1081b6c57d3c319d1ba
-size 2873458601

audioldm/audioldm2-large/unet/diffusion_pytorch_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c58b1585fc87e9fb31cb16e049100c3626685ab418a4a1f48a370713f0131d13
-size 2872468672

audioldm/audioldm2-large/vae/config.json DELETED Viewed

@@ -1,28 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.20.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 1,
-  "latent_channels": 8,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 1,
-  "sample_size": 1024,
-  "scaling_factor": 0.400870144367218,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}

audioldm/audioldm2-large/vae/diffusion_pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b3494aadd9cf3e3f0cbb4e913f9b35a25da4a3cb709852e204b667ae5890f758
-size 221586761

audioldm/audioldm2-large/vae/diffusion_pytorch_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5f8ddddc5c45eddaab38a67a434e8a64486964540ba3fc248a0da7cbd599d4ad
-size 221530308

audioldm/audioldm2-large/vocoder/config.json DELETED Viewed

@@ -1,50 +0,0 @@
-{
-  "architectures": [
-    "SpeechT5HifiGan"
-  ],
-  "initializer_range": 0.01,
-  "leaky_relu_slope": 0.1,
-  "model_in_dim": 64,
-  "model_type": "hifigan",
-  "normalize_before": false,
-  "resblock_dilation_sizes": [
-    [
-      1,
-      3,
-      5
-    ],
-    [
-      1,
-      3,
-      5
-    ],
-    [
-      1,
-      3,
-      5
-    ]
-  ],
-  "resblock_kernel_sizes": [
-    3,
-    7,
-    11
-  ],
-  "sampling_rate": 16000,
-  "torch_dtype": "float32",
-  "transformers_version": "4.32.0.dev0",
-  "upsample_initial_channel": 1024,
-  "upsample_kernel_sizes": [
-    16,
-    16,
-    8,
-    4,
-    4
-  ],
-  "upsample_rates": [
-    5,
-    4,
-    2,
-    2,
-    2
-  ]
-}

audioldm/audioldm2-large/vocoder/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d9dc6513c30a5b86c2497712690c04fe74b4aa79fdab6d490b34fcb4e24c590c
-size 221079092

audioldm/audioldm2-large/vocoder/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f9fbefc2b31c85d1dabe98e53d09ac88039af411162a7e641040a9c2b5f62364
-size 221120349

audioldm/audioldm2-music/feature_extractor/preprocessor_config.json DELETED Viewed

@@ -1,22 +0,0 @@
-{
-  "chunk_length_s": 10,
-  "feature_extractor_type": "ClapFeatureExtractor",
-  "feature_size": 64,
-  "fft_window_size": 1024,
-  "frequency_max": 14000,
-  "frequency_min": 50,
-  "hop_length": 480,
-  "max_length_s": 10,
-  "n_fft": 1024,
-  "nb_frequency_bins": 513,
-  "nb_max_frames": 1000,
-  "nb_max_samples": 480000,
-  "padding": "repeatpad",
-  "padding_side": "right",
-  "padding_value": 0.0,
-  "processor_class": "ClapProcessor",
-  "return_attention_mask": false,
-  "sampling_rate": 48000,
-  "top_db": null,
-  "truncation": "rand_trunc"
-}

audioldm/audioldm2-music/language_model/config.json DELETED Viewed

@@ -1,39 +0,0 @@
-{
-  "activation_function": "gelu_new",
-  "architectures": [
-    "GPT2Model"
-  ],
-  "attn_pdrop": 0.1,
-  "bos_token_id": 50256,
-  "embd_pdrop": 0.1,
-  "eos_token_id": 50256,
-  "initializer_range": 0.02,
-  "layer_norm_epsilon": 1e-05,
-  "max_new_tokens": 8,
-  "model_type": "gpt2",
-  "n_ctx": 1024,
-  "n_embd": 768,
-  "n_head": 12,
-  "n_inner": null,
-  "n_layer": 12,
-  "n_positions": 1024,
-  "reorder_and_upcast_attn": false,
-  "resid_pdrop": 0.1,
-  "scale_attn_by_inverse_layer_idx": false,
-  "scale_attn_weights": true,
-  "summary_activation": null,
-  "summary_first_dropout": 0.1,
-  "summary_proj_to_labels": true,
-  "summary_type": "cls_index",
-  "summary_use_proj": true,
-  "task_specific_params": {
-    "text-generation": {
-      "do_sample": true,
-      "max_length": 50
-    }
-  },
-  "torch_dtype": "float32",
-  "transformers_version": "4.32.0.dev0",
-  "use_cache": true,
-  "vocab_size": 50257
-}

audioldm/audioldm2-music/language_model/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8c57787555e1feb378e69e676a9d4f384363150dee6eeef390c47c1a5a99525d
-size 497772432

audioldm/audioldm2-music/language_model/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:be4e5b755437914957edb250243e323fa2b38ab53f097078bea7c2d78aeae507
-size 497803293

audioldm/audioldm2-music/model_index.json DELETED Viewed

@@ -1,48 +0,0 @@
-{
-  "_class_name": "AudioLDM2Pipeline",
-  "_diffusers_version": "0.20.0.dev0",
-  "feature_extractor": [
-    "transformers",
-    "ClapFeatureExtractor"
-  ],
-  "language_model": [
-    "transformers",
-    "GPT2Model"
-  ],
-  "projection_model": [
-    "audioldm2",
-    "AudioLDM2ProjectionModel"
-  ],
-  "scheduler": [
-    "diffusers",
-    "DDIMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "ClapModel"
-  ],
-  "text_encoder_2": [
-    "transformers",
-    "T5EncoderModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "RobertaTokenizerFast"
-  ],
-  "tokenizer_2": [
-    "transformers",
-    "T5TokenizerFast"
-  ],
-  "unet": [
-    "audioldm2",
-    "AudioLDM2UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ],
-  "vocoder": [
-    "transformers",
-    "SpeechT5HifiGan"
-  ]
-}

audioldm/audioldm2-music/projection_model/config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_class_name": "AudioLDM2ProjectionModel",
-  "_diffusers_version": "0.20.0.dev0",
-  "langauge_model_dim": 768,
-  "text_encoder_1_dim": 1024,
-  "text_encoder_dim": 512
-}

audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:91e9680e70c34aa7957b8d0cfdce48fe8cd145b509d672b83d3d090863d77946
-size 4739951

audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d58536f307aec6d114f8017c0c0e46762a35bce76f418f8ff7cee0c68ebc8f8
-size 4737688

audioldm/audioldm2-music/scheduler/scheduler_config.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "_class_name": "DDIMScheduler",
-  "_diffusers_version": "0.20.0.dev0",
-  "beta_end": 0.0195,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.0015,
-  "clip_sample": false,
-  "clip_sample_range": 1.0,
-  "dynamic_thresholding_ratio": 0.995,
-  "num_train_timesteps": 1000,
-  "prediction_type": "epsilon",
-  "rescale_betas_zero_snr": false,
-  "sample_max_value": 1.0,
-  "set_alpha_to_one": false,
-  "steps_offset": 1,
-  "thresholding": false,
-  "timestep_spacing": "leading",
-  "trained_betas": null
-}

audioldm/audioldm2-music/text_encoder/config.json DELETED Viewed

@@ -1,35 +0,0 @@
-{
-  "architectures": [
-    "ClapModel"
-  ],
-  "audio_config": {
-    "depths": [
-      2,
-      2,
-      12,
-      2
-    ],
-    "fusion_num_hidden_layers": 2,
-    "hidden_size": 1024,
-    "model_type": "clap_audio_model",
-    "patch_embeds_hidden_size": 128,
-    "projection_hidden_size": 768
-  },
-  "hidden_size": 768,
-  "initializer_factor": 1.0,
-  "logit_scale_init_value": 14.285714285714285,
-  "model_type": "clap",
-  "num_hidden_layers": 16,
-  "projection_dim": 512,
-  "projection_hidden_act": "relu",
-  "text_config": {
-    "classifier_dropout": null,
-    "fusion_hidden_size": 768,
-    "fusion_num_hidden_layers": 2,
-    "initializer_range": 0.02,
-    "model_type": "clap_text_model",
-    "projection_hidden_size": 768
-  },
-  "torch_dtype": "float64",
-  "transformers_version": "4.32.0.dev0"
-}

audioldm/audioldm2-music/text_encoder/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a4a47b4a637dd58e9edb7b64a06acf37328b7cc3eafb0b8a85df895cc9e45d09
-size 776327432

audioldm/audioldm2-music/text_encoder/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:637b3ff0f7b212cedafb00739521dc49d8f7953f12bfc1f76ff692f108a41ed0
-size 776444665

audioldm/audioldm2-music/text_encoder_2/config.json DELETED Viewed

@@ -1,32 +0,0 @@
-{
-  "architectures": [
-    "T5EncoderModel"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 2816,
-  "d_kv": 64,
-  "d_model": 1024,
-  "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 24,
-  "num_heads": 16,
-  "num_layers": 24,
-  "output_past": true,
-  "pad_token_id": 0,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.32.0.dev0",
-  "use_cache": true,
-  "vocab_size": 32128
-}

audioldm/audioldm2-music/text_encoder_2/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c1d0c8f1c739db9343c12ea4b0e3f2c97a833b3c072c251e91d97b7326fefb4e
-size 1364951064

audioldm/audioldm2-music/text_encoder_2/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8c4be8e23954ef72bd0d623206a46b7e1ab7fa23f530b7b9f691d40785273b27
-size 1364996921

audioldm/audioldm2-music/tokenizer/merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff