AnhP commited on
Commit
acff07e
·
verified ·
1 Parent(s): a80535c

Delete audioldm

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. audioldm/README.txt +0 -29
  2. audioldm/audioldm2-large/feature_extractor/preprocessor_config.json +0 -22
  3. audioldm/audioldm2-large/language_model/config.json +0 -39
  4. audioldm/audioldm2-large/language_model/model.safetensors +0 -3
  5. audioldm/audioldm2-large/language_model/pytorch_model.bin +0 -3
  6. audioldm/audioldm2-large/model_index.json +0 -48
  7. audioldm/audioldm2-large/projection_model/config.json +0 -7
  8. audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.bin +0 -3
  9. audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.safetensors +0 -3
  10. audioldm/audioldm2-large/scheduler/scheduler_config.json +0 -19
  11. audioldm/audioldm2-large/text_encoder/config.json +0 -35
  12. audioldm/audioldm2-large/text_encoder/model.safetensors +0 -3
  13. audioldm/audioldm2-large/text_encoder/pytorch_model.bin +0 -3
  14. audioldm/audioldm2-large/text_encoder_2/config.json +0 -32
  15. audioldm/audioldm2-large/text_encoder_2/model.safetensors +0 -3
  16. audioldm/audioldm2-large/text_encoder_2/pytorch_model.bin +0 -3
  17. audioldm/audioldm2-large/tokenizer/merges.txt +0 -0
  18. audioldm/audioldm2-large/tokenizer/special_tokens_map.json +0 -15
  19. audioldm/audioldm2-large/tokenizer/tokenizer.json +0 -0
  20. audioldm/audioldm2-large/tokenizer/tokenizer_config.json +0 -20
  21. audioldm/audioldm2-large/tokenizer/vocab.json +0 -0
  22. audioldm/audioldm2-large/tokenizer_2/special_tokens_map.json +0 -107
  23. audioldm/audioldm2-large/tokenizer_2/spiece.model +0 -3
  24. audioldm/audioldm2-large/tokenizer_2/tokenizer.json +0 -0
  25. audioldm/audioldm2-large/tokenizer_2/tokenizer_config.json +0 -112
  26. audioldm/audioldm2-large/unet/config.json +0 -78
  27. audioldm/audioldm2-large/unet/diffusion_pytorch_model.bin +0 -3
  28. audioldm/audioldm2-large/unet/diffusion_pytorch_model.safetensors +0 -3
  29. audioldm/audioldm2-large/vae/config.json +0 -28
  30. audioldm/audioldm2-large/vae/diffusion_pytorch_model.bin +0 -3
  31. audioldm/audioldm2-large/vae/diffusion_pytorch_model.safetensors +0 -3
  32. audioldm/audioldm2-large/vocoder/config.json +0 -50
  33. audioldm/audioldm2-large/vocoder/model.safetensors +0 -3
  34. audioldm/audioldm2-large/vocoder/pytorch_model.bin +0 -3
  35. audioldm/audioldm2-music/feature_extractor/preprocessor_config.json +0 -22
  36. audioldm/audioldm2-music/language_model/config.json +0 -39
  37. audioldm/audioldm2-music/language_model/model.safetensors +0 -3
  38. audioldm/audioldm2-music/language_model/pytorch_model.bin +0 -3
  39. audioldm/audioldm2-music/model_index.json +0 -48
  40. audioldm/audioldm2-music/projection_model/config.json +0 -7
  41. audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.bin +0 -3
  42. audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.safetensors +0 -3
  43. audioldm/audioldm2-music/scheduler/scheduler_config.json +0 -19
  44. audioldm/audioldm2-music/text_encoder/config.json +0 -35
  45. audioldm/audioldm2-music/text_encoder/model.safetensors +0 -3
  46. audioldm/audioldm2-music/text_encoder/pytorch_model.bin +0 -3
  47. audioldm/audioldm2-music/text_encoder_2/config.json +0 -32
  48. audioldm/audioldm2-music/text_encoder_2/model.safetensors +0 -3
  49. audioldm/audioldm2-music/text_encoder_2/pytorch_model.bin +0 -3
  50. audioldm/audioldm2-music/tokenizer/merges.txt +0 -0
audioldm/README.txt DELETED
@@ -1,29 +0,0 @@
1
- Mô hình thuộc sở hữu của https://huggingface.co/cvssp
2
-
3
- Mô hình Audioldm2: https://huggingface.co/cvssp/audioldm2
4
- Bản Quyền: cc-by-nc-sa-4.0
5
-
6
- Mô hình Audioldm2-large: https://huggingface.co/cvssp/audioldm2-large
7
- Bản Quyền: cc-by-nc-sa-4.0
8
-
9
- Mô hình Audioldm2-music: https://huggingface.co/cvssp/audioldm2-music
10
- Bản Quyền: cc-by-nc-sa-4.0
11
-
12
- NẾU SỬ DỤNG CÁC MÔ HÌNH TỪ KHO LƯU TRỮ NÀY VUI LÒNG TUÂN THỦ BẢN QUYỀN TỪ CÁC MÔ HÌNH TRÊN!
13
- BẠN CHỊU TRÁCH NHIỆM HOÀN TOÀN ĐỐI VỚI BẤT KỲ THIỆT HẠI NÀO PHÁT SINH TỪ VIỆC SỬ DỤNG KHO LƯU TRỮ KHÔNG ĐÚNG CÁCH!
14
- TÔI SẼ KHÔNG CHỊU TRÁCH NHIỆM VỚI BẤT KỲ THIỆT HẠI TRỰC TIẾP HOẶC GIÁN TIẾP NÀO PHÁT SINH TỪ VIỆC SỬ DỤNG KHO LƯU TRỮ NÀY!
15
-
16
- Model owned by https://huggingface.co/cvssp
17
-
18
- Audioldm2 model: https://huggingface.co/cvssp/audioldm2
19
- Copyright: cc-by-nc-sa-4.0
20
-
21
- Audioldm2-large model: https://huggingface.co/cvssp/audioldm2-large
22
- Copyright: cc-by-nc-sa-4.0
23
-
24
- Audioldm2-music model: https://huggingface.co/cvssp/audioldm2-music
25
- Copyright: cc-by-nc-sa-4.0
26
-
27
- IF YOU USE MODELS FROM THIS ARCHIVE PLEASE COMPLY WITH THE COPYRIGHT FROM THE ABOVE MODELS!
28
- YOU ARE SOLELY RESPONSIBLE FOR ANY DAMAGES THAT RESULT FROM IMPROPER USE OF THE ARCHIVE!
29
- I WILL NOT BE LIABLE FOR ANY DIRECT OR INDIRECT DAMAGES THAT RESULT FROM THE USE OF THIS ARCHIVE!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/feature_extractor/preprocessor_config.json DELETED
@@ -1,22 +0,0 @@
1
- {
2
- "chunk_length_s": 10,
3
- "feature_extractor_type": "ClapFeatureExtractor",
4
- "feature_size": 64,
5
- "fft_window_size": 1024,
6
- "frequency_max": 14000,
7
- "frequency_min": 50,
8
- "hop_length": 480,
9
- "max_length_s": 10,
10
- "n_fft": 1024,
11
- "nb_frequency_bins": 513,
12
- "nb_max_frames": 1000,
13
- "nb_max_samples": 480000,
14
- "padding": "repeatpad",
15
- "padding_side": "right",
16
- "padding_value": 0.0,
17
- "processor_class": "ClapProcessor",
18
- "return_attention_mask": false,
19
- "sampling_rate": 48000,
20
- "top_db": null,
21
- "truncation": "rand_trunc"
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/language_model/config.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "activation_function": "gelu_new",
3
- "architectures": [
4
- "GPT2Model"
5
- ],
6
- "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
8
- "embd_pdrop": 0.1,
9
- "eos_token_id": 50256,
10
- "initializer_range": 0.02,
11
- "layer_norm_epsilon": 1e-05,
12
- "max_new_tokens": 8,
13
- "model_type": "gpt2",
14
- "n_ctx": 1024,
15
- "n_embd": 768,
16
- "n_head": 12,
17
- "n_inner": null,
18
- "n_layer": 12,
19
- "n_positions": 1024,
20
- "reorder_and_upcast_attn": false,
21
- "resid_pdrop": 0.1,
22
- "scale_attn_by_inverse_layer_idx": false,
23
- "scale_attn_weights": true,
24
- "summary_activation": null,
25
- "summary_first_dropout": 0.1,
26
- "summary_proj_to_labels": true,
27
- "summary_type": "cls_index",
28
- "summary_use_proj": true,
29
- "task_specific_params": {
30
- "text-generation": {
31
- "do_sample": true,
32
- "max_length": 50
33
- }
34
- },
35
- "torch_dtype": "float32",
36
- "transformers_version": "4.32.0.dev0",
37
- "use_cache": true,
38
- "vocab_size": 50257
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/language_model/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:350bb51cf4f23502f239009d12ce0230d166f9d3f5752196505c0a6d841a5514
3
- size 497772432
 
 
 
 
audioldm/audioldm2-large/language_model/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca0febb7670925080c1737a3b36689cea7aefe48c2b45d2321f2ba0bbcbf08fc
3
- size 497803293
 
 
 
 
audioldm/audioldm2-large/model_index.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "_class_name": "AudioLDM2Pipeline",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "feature_extractor": [
5
- "transformers",
6
- "ClapFeatureExtractor"
7
- ],
8
- "language_model": [
9
- "transformers",
10
- "GPT2Model"
11
- ],
12
- "projection_model": [
13
- "audioldm2",
14
- "AudioLDM2ProjectionModel"
15
- ],
16
- "scheduler": [
17
- "diffusers",
18
- "DDIMScheduler"
19
- ],
20
- "text_encoder": [
21
- "transformers",
22
- "ClapModel"
23
- ],
24
- "text_encoder_2": [
25
- "transformers",
26
- "T5EncoderModel"
27
- ],
28
- "tokenizer": [
29
- "transformers",
30
- "RobertaTokenizerFast"
31
- ],
32
- "tokenizer_2": [
33
- "transformers",
34
- "T5TokenizerFast"
35
- ],
36
- "unet": [
37
- "audioldm2",
38
- "AudioLDM2UNet2DConditionModel"
39
- ],
40
- "vae": [
41
- "diffusers",
42
- "AutoencoderKL"
43
- ],
44
- "vocoder": [
45
- "transformers",
46
- "SpeechT5HifiGan"
47
- ]
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/projection_model/config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_class_name": "AudioLDM2ProjectionModel",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "langauge_model_dim": 768,
5
- "text_encoder_1_dim": 1024,
6
- "text_encoder_dim": 512
7
- }
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:abba0622501d8cd9c640a726df5da03c124bf966bba98809af620dc2f2681f5f
3
- size 4739951
 
 
 
 
audioldm/audioldm2-large/projection_model/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d41d9363c1e9f7c455d1ce9d60d3c888793e295128a1bc722be96b9f5718a4ed
3
- size 4737688
 
 
 
 
audioldm/audioldm2-large/scheduler/scheduler_config.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "_class_name": "DDIMScheduler",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "beta_end": 0.0195,
5
- "beta_schedule": "scaled_linear",
6
- "beta_start": 0.0015,
7
- "clip_sample": false,
8
- "clip_sample_range": 1.0,
9
- "dynamic_thresholding_ratio": 0.995,
10
- "num_train_timesteps": 1000,
11
- "prediction_type": "epsilon",
12
- "rescale_betas_zero_snr": false,
13
- "sample_max_value": 1.0,
14
- "set_alpha_to_one": false,
15
- "steps_offset": 1,
16
- "thresholding": false,
17
- "timestep_spacing": "leading",
18
- "trained_betas": null
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/text_encoder/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "architectures": [
3
- "ClapModel"
4
- ],
5
- "audio_config": {
6
- "depths": [
7
- 2,
8
- 2,
9
- 12,
10
- 2
11
- ],
12
- "fusion_num_hidden_layers": 2,
13
- "hidden_size": 1024,
14
- "model_type": "clap_audio_model",
15
- "patch_embeds_hidden_size": 128,
16
- "projection_hidden_size": 768
17
- },
18
- "hidden_size": 768,
19
- "initializer_factor": 1.0,
20
- "logit_scale_init_value": 14.285714285714285,
21
- "model_type": "clap",
22
- "num_hidden_layers": 16,
23
- "projection_dim": 512,
24
- "projection_hidden_act": "relu",
25
- "text_config": {
26
- "classifier_dropout": null,
27
- "fusion_hidden_size": 768,
28
- "fusion_num_hidden_layers": 2,
29
- "initializer_range": 0.02,
30
- "model_type": "clap_text_model",
31
- "projection_hidden_size": 768
32
- },
33
- "torch_dtype": "float64",
34
- "transformers_version": "4.32.0.dev0"
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/text_encoder/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4a47b4a637dd58e9edb7b64a06acf37328b7cc3eafb0b8a85df895cc9e45d09
3
- size 776327432
 
 
 
 
audioldm/audioldm2-large/text_encoder/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:637b3ff0f7b212cedafb00739521dc49d8f7953f12bfc1f76ff692f108a41ed0
3
- size 776444665
 
 
 
 
audioldm/audioldm2-large/text_encoder_2/config.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "architectures": [
3
- "T5EncoderModel"
4
- ],
5
- "classifier_dropout": 0.0,
6
- "d_ff": 2816,
7
- "d_kv": 64,
8
- "d_model": 1024,
9
- "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.1,
12
- "eos_token_id": 1,
13
- "feed_forward_proj": "gated-gelu",
14
- "initializer_factor": 1.0,
15
- "is_encoder_decoder": true,
16
- "is_gated_act": true,
17
- "layer_norm_epsilon": 1e-06,
18
- "model_type": "t5",
19
- "n_positions": 512,
20
- "num_decoder_layers": 24,
21
- "num_heads": 16,
22
- "num_layers": 24,
23
- "output_past": true,
24
- "pad_token_id": 0,
25
- "relative_attention_max_distance": 128,
26
- "relative_attention_num_buckets": 32,
27
- "tie_word_embeddings": false,
28
- "torch_dtype": "float32",
29
- "transformers_version": "4.32.0.dev0",
30
- "use_cache": true,
31
- "vocab_size": 32128
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/text_encoder_2/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d0c8f1c739db9343c12ea4b0e3f2c97a833b3c072c251e91d97b7326fefb4e
3
- size 1364951064
 
 
 
 
audioldm/audioldm2-large/text_encoder_2/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4be8e23954ef72bd0d623206a46b7e1ab7fa23f530b7b9f691d40785273b27
3
- size 1364996921
 
 
 
 
audioldm/audioldm2-large/tokenizer/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
audioldm/audioldm2-large/tokenizer/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/tokenizer/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
audioldm/audioldm2-large/tokenizer/tokenizer_config.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<s>",
4
- "clean_up_tokenization_spaces": true,
5
- "cls_token": "<s>",
6
- "eos_token": "</s>",
7
- "errors": "replace",
8
- "mask_token": "<mask>",
9
- "max_length": null,
10
- "model_max_length": 512,
11
- "pad_to_multiple_of": null,
12
- "pad_token": "<pad>",
13
- "pad_token_type_id": 0,
14
- "padding_side": "right",
15
- "processor_class": "ClapProcessor",
16
- "sep_token": "</s>",
17
- "tokenizer_class": "RobertaTokenizer",
18
- "trim_offsets": true,
19
- "unk_token": "<unk>"
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/tokenizer/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
audioldm/audioldm2-large/tokenizer_2/special_tokens_map.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/tokenizer_2/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
 
 
 
audioldm/audioldm2-large/tokenizer_2/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
audioldm/audioldm2-large/tokenizer_2/tokenizer_config.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "clean_up_tokenization_spaces": true,
105
- "eos_token": "</s>",
106
- "extra_ids": 100,
107
- "model_max_length": 128,
108
- "pad_token": "<pad>",
109
- "sp_model_kwargs": {},
110
- "tokenizer_class": "T5Tokenizer",
111
- "unk_token": "<unk>"
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/unet/config.json DELETED
@@ -1,78 +0,0 @@
1
- {
2
- "_class_name": "AudioLDM2UNet2DConditionModel",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "act_fn": "silu",
5
- "attention_head_dim": 8,
6
- "block_out_channels": [
7
- 128,
8
- 256,
9
- 384,
10
- 640
11
- ],
12
- "class_embed_type": null,
13
- "class_embeddings_concat": false,
14
- "conv_in_kernel": 3,
15
- "conv_out_kernel": 3,
16
- "cross_attention_dim": [
17
- [
18
- null,
19
- 768,
20
- 1024,
21
- null
22
- ],
23
- [
24
- null,
25
- 768,
26
- 1024,
27
- null
28
- ],
29
- [
30
- null,
31
- 768,
32
- 1024,
33
- null
34
- ],
35
- [
36
- null,
37
- 768,
38
- 1024,
39
- null
40
- ]
41
- ],
42
- "down_block_types": [
43
- "DownBlock2D",
44
- "CrossAttnDownBlock2D",
45
- "CrossAttnDownBlock2D",
46
- "CrossAttnDownBlock2D"
47
- ],
48
- "downsample_padding": 1,
49
- "flip_sin_to_cos": true,
50
- "freq_shift": 0,
51
- "in_channels": 8,
52
- "layers_per_block": 2,
53
- "mid_block_scale_factor": 1,
54
- "mid_block_type": "UNetMidBlock2DCrossAttn",
55
- "norm_eps": 1e-05,
56
- "norm_num_groups": 32,
57
- "num_attention_heads": null,
58
- "num_class_embeds": null,
59
- "only_cross_attention": false,
60
- "out_channels": 8,
61
- "projection_class_embeddings_input_dim": null,
62
- "resnet_time_scale_shift": "default",
63
- "sample_size": 256,
64
- "time_cond_proj_dim": null,
65
- "time_embedding_act_fn": null,
66
- "time_embedding_dim": null,
67
- "time_embedding_type": "positional",
68
- "timestep_post_act": null,
69
- "transformer_layers_per_block": 2,
70
- "up_block_types": [
71
- "CrossAttnUpBlock2D",
72
- "CrossAttnUpBlock2D",
73
- "CrossAttnUpBlock2D",
74
- "UpBlock2D"
75
- ],
76
- "upcast_attention": false,
77
- "use_linear_projection": false
78
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/unet/diffusion_pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0002541e3d5ef789055304622fcf5d9b810dc413724ab1081b6c57d3c319d1ba
3
- size 2873458601
 
 
 
 
audioldm/audioldm2-large/unet/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c58b1585fc87e9fb31cb16e049100c3626685ab418a4a1f48a370713f0131d13
3
- size 2872468672
 
 
 
 
audioldm/audioldm2-large/vae/config.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "act_fn": "silu",
5
- "block_out_channels": [
6
- 128,
7
- 256,
8
- 512
9
- ],
10
- "down_block_types": [
11
- "DownEncoderBlock2D",
12
- "DownEncoderBlock2D",
13
- "DownEncoderBlock2D"
14
- ],
15
- "force_upcast": true,
16
- "in_channels": 1,
17
- "latent_channels": 8,
18
- "layers_per_block": 2,
19
- "norm_num_groups": 32,
20
- "out_channels": 1,
21
- "sample_size": 1024,
22
- "scaling_factor": 0.400870144367218,
23
- "up_block_types": [
24
- "UpDecoderBlock2D",
25
- "UpDecoderBlock2D",
26
- "UpDecoderBlock2D"
27
- ]
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/vae/diffusion_pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3494aadd9cf3e3f0cbb4e913f9b35a25da4a3cb709852e204b667ae5890f758
3
- size 221586761
 
 
 
 
audioldm/audioldm2-large/vae/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f8ddddc5c45eddaab38a67a434e8a64486964540ba3fc248a0da7cbd599d4ad
3
- size 221530308
 
 
 
 
audioldm/audioldm2-large/vocoder/config.json DELETED
@@ -1,50 +0,0 @@
1
- {
2
- "architectures": [
3
- "SpeechT5HifiGan"
4
- ],
5
- "initializer_range": 0.01,
6
- "leaky_relu_slope": 0.1,
7
- "model_in_dim": 64,
8
- "model_type": "hifigan",
9
- "normalize_before": false,
10
- "resblock_dilation_sizes": [
11
- [
12
- 1,
13
- 3,
14
- 5
15
- ],
16
- [
17
- 1,
18
- 3,
19
- 5
20
- ],
21
- [
22
- 1,
23
- 3,
24
- 5
25
- ]
26
- ],
27
- "resblock_kernel_sizes": [
28
- 3,
29
- 7,
30
- 11
31
- ],
32
- "sampling_rate": 16000,
33
- "torch_dtype": "float32",
34
- "transformers_version": "4.32.0.dev0",
35
- "upsample_initial_channel": 1024,
36
- "upsample_kernel_sizes": [
37
- 16,
38
- 16,
39
- 8,
40
- 4,
41
- 4
42
- ],
43
- "upsample_rates": [
44
- 5,
45
- 4,
46
- 2,
47
- 2,
48
- 2
49
- ]
50
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-large/vocoder/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9dc6513c30a5b86c2497712690c04fe74b4aa79fdab6d490b34fcb4e24c590c
3
- size 221079092
 
 
 
 
audioldm/audioldm2-large/vocoder/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9fbefc2b31c85d1dabe98e53d09ac88039af411162a7e641040a9c2b5f62364
3
- size 221120349
 
 
 
 
audioldm/audioldm2-music/feature_extractor/preprocessor_config.json DELETED
@@ -1,22 +0,0 @@
1
- {
2
- "chunk_length_s": 10,
3
- "feature_extractor_type": "ClapFeatureExtractor",
4
- "feature_size": 64,
5
- "fft_window_size": 1024,
6
- "frequency_max": 14000,
7
- "frequency_min": 50,
8
- "hop_length": 480,
9
- "max_length_s": 10,
10
- "n_fft": 1024,
11
- "nb_frequency_bins": 513,
12
- "nb_max_frames": 1000,
13
- "nb_max_samples": 480000,
14
- "padding": "repeatpad",
15
- "padding_side": "right",
16
- "padding_value": 0.0,
17
- "processor_class": "ClapProcessor",
18
- "return_attention_mask": false,
19
- "sampling_rate": 48000,
20
- "top_db": null,
21
- "truncation": "rand_trunc"
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/language_model/config.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "activation_function": "gelu_new",
3
- "architectures": [
4
- "GPT2Model"
5
- ],
6
- "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
8
- "embd_pdrop": 0.1,
9
- "eos_token_id": 50256,
10
- "initializer_range": 0.02,
11
- "layer_norm_epsilon": 1e-05,
12
- "max_new_tokens": 8,
13
- "model_type": "gpt2",
14
- "n_ctx": 1024,
15
- "n_embd": 768,
16
- "n_head": 12,
17
- "n_inner": null,
18
- "n_layer": 12,
19
- "n_positions": 1024,
20
- "reorder_and_upcast_attn": false,
21
- "resid_pdrop": 0.1,
22
- "scale_attn_by_inverse_layer_idx": false,
23
- "scale_attn_weights": true,
24
- "summary_activation": null,
25
- "summary_first_dropout": 0.1,
26
- "summary_proj_to_labels": true,
27
- "summary_type": "cls_index",
28
- "summary_use_proj": true,
29
- "task_specific_params": {
30
- "text-generation": {
31
- "do_sample": true,
32
- "max_length": 50
33
- }
34
- },
35
- "torch_dtype": "float32",
36
- "transformers_version": "4.32.0.dev0",
37
- "use_cache": true,
38
- "vocab_size": 50257
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/language_model/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c57787555e1feb378e69e676a9d4f384363150dee6eeef390c47c1a5a99525d
3
- size 497772432
 
 
 
 
audioldm/audioldm2-music/language_model/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:be4e5b755437914957edb250243e323fa2b38ab53f097078bea7c2d78aeae507
3
- size 497803293
 
 
 
 
audioldm/audioldm2-music/model_index.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "_class_name": "AudioLDM2Pipeline",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "feature_extractor": [
5
- "transformers",
6
- "ClapFeatureExtractor"
7
- ],
8
- "language_model": [
9
- "transformers",
10
- "GPT2Model"
11
- ],
12
- "projection_model": [
13
- "audioldm2",
14
- "AudioLDM2ProjectionModel"
15
- ],
16
- "scheduler": [
17
- "diffusers",
18
- "DDIMScheduler"
19
- ],
20
- "text_encoder": [
21
- "transformers",
22
- "ClapModel"
23
- ],
24
- "text_encoder_2": [
25
- "transformers",
26
- "T5EncoderModel"
27
- ],
28
- "tokenizer": [
29
- "transformers",
30
- "RobertaTokenizerFast"
31
- ],
32
- "tokenizer_2": [
33
- "transformers",
34
- "T5TokenizerFast"
35
- ],
36
- "unet": [
37
- "audioldm2",
38
- "AudioLDM2UNet2DConditionModel"
39
- ],
40
- "vae": [
41
- "diffusers",
42
- "AutoencoderKL"
43
- ],
44
- "vocoder": [
45
- "transformers",
46
- "SpeechT5HifiGan"
47
- ]
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/projection_model/config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_class_name": "AudioLDM2ProjectionModel",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "langauge_model_dim": 768,
5
- "text_encoder_1_dim": 1024,
6
- "text_encoder_dim": 512
7
- }
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:91e9680e70c34aa7957b8d0cfdce48fe8cd145b509d672b83d3d090863d77946
3
- size 4739951
 
 
 
 
audioldm/audioldm2-music/projection_model/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d58536f307aec6d114f8017c0c0e46762a35bce76f418f8ff7cee0c68ebc8f8
3
- size 4737688
 
 
 
 
audioldm/audioldm2-music/scheduler/scheduler_config.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "_class_name": "DDIMScheduler",
3
- "_diffusers_version": "0.20.0.dev0",
4
- "beta_end": 0.0195,
5
- "beta_schedule": "scaled_linear",
6
- "beta_start": 0.0015,
7
- "clip_sample": false,
8
- "clip_sample_range": 1.0,
9
- "dynamic_thresholding_ratio": 0.995,
10
- "num_train_timesteps": 1000,
11
- "prediction_type": "epsilon",
12
- "rescale_betas_zero_snr": false,
13
- "sample_max_value": 1.0,
14
- "set_alpha_to_one": false,
15
- "steps_offset": 1,
16
- "thresholding": false,
17
- "timestep_spacing": "leading",
18
- "trained_betas": null
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/text_encoder/config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "architectures": [
3
- "ClapModel"
4
- ],
5
- "audio_config": {
6
- "depths": [
7
- 2,
8
- 2,
9
- 12,
10
- 2
11
- ],
12
- "fusion_num_hidden_layers": 2,
13
- "hidden_size": 1024,
14
- "model_type": "clap_audio_model",
15
- "patch_embeds_hidden_size": 128,
16
- "projection_hidden_size": 768
17
- },
18
- "hidden_size": 768,
19
- "initializer_factor": 1.0,
20
- "logit_scale_init_value": 14.285714285714285,
21
- "model_type": "clap",
22
- "num_hidden_layers": 16,
23
- "projection_dim": 512,
24
- "projection_hidden_act": "relu",
25
- "text_config": {
26
- "classifier_dropout": null,
27
- "fusion_hidden_size": 768,
28
- "fusion_num_hidden_layers": 2,
29
- "initializer_range": 0.02,
30
- "model_type": "clap_text_model",
31
- "projection_hidden_size": 768
32
- },
33
- "torch_dtype": "float64",
34
- "transformers_version": "4.32.0.dev0"
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/text_encoder/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4a47b4a637dd58e9edb7b64a06acf37328b7cc3eafb0b8a85df895cc9e45d09
3
- size 776327432
 
 
 
 
audioldm/audioldm2-music/text_encoder/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:637b3ff0f7b212cedafb00739521dc49d8f7953f12bfc1f76ff692f108a41ed0
3
- size 776444665
 
 
 
 
audioldm/audioldm2-music/text_encoder_2/config.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "architectures": [
3
- "T5EncoderModel"
4
- ],
5
- "classifier_dropout": 0.0,
6
- "d_ff": 2816,
7
- "d_kv": 64,
8
- "d_model": 1024,
9
- "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.1,
12
- "eos_token_id": 1,
13
- "feed_forward_proj": "gated-gelu",
14
- "initializer_factor": 1.0,
15
- "is_encoder_decoder": true,
16
- "is_gated_act": true,
17
- "layer_norm_epsilon": 1e-06,
18
- "model_type": "t5",
19
- "n_positions": 512,
20
- "num_decoder_layers": 24,
21
- "num_heads": 16,
22
- "num_layers": 24,
23
- "output_past": true,
24
- "pad_token_id": 0,
25
- "relative_attention_max_distance": 128,
26
- "relative_attention_num_buckets": 32,
27
- "tie_word_embeddings": false,
28
- "torch_dtype": "float32",
29
- "transformers_version": "4.32.0.dev0",
30
- "use_cache": true,
31
- "vocab_size": 32128
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audioldm/audioldm2-music/text_encoder_2/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d0c8f1c739db9343c12ea4b0e3f2c97a833b3c072c251e91d97b7326fefb4e
3
- size 1364951064
 
 
 
 
audioldm/audioldm2-music/text_encoder_2/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4be8e23954ef72bd0d623206a46b7e1ab7fa23f530b7b9f691d40785273b27
3
- size 1364996921
 
 
 
 
audioldm/audioldm2-music/tokenizer/merges.txt DELETED
The diff for this file is too large to render. See raw diff