Text Generation
Transformers
PyTorch
Safetensors
English
gpt_neox
causal-lm
pythia
text-generation-inference
Files changed (1) hide show
  1. EleutherAI_pythia-12b.json +144 -0
EleutherAI_pythia-12b.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bomFormat": "CycloneDX",
3
+ "specVersion": "1.6",
4
+ "serialNumber": "urn:uuid:e33947f8-aae7-4995-adeb-d36d728818aa",
5
+ "version": 1,
6
+ "metadata": {
7
+ "timestamp": "2025-10-07T08:19:30.409870+00:00",
8
+ "component": {
9
+ "type": "machine-learning-model",
10
+ "bom-ref": "EleutherAI/pythia-12b-e54a9a31-ff47-5eb1-8c49-d06c73636dc7",
11
+ "licenses": [
12
+ {
13
+ "license": {
14
+ "id": "Apache-2.0",
15
+ "url": "https://spdx.org/licenses/Apache-2.0.html"
16
+ }
17
+ }
18
+ ],
19
+ "externalReferences": [
20
+ {
21
+ "url": "https://huggingface.co/EleutherAI/pythia-12b",
22
+ "type": "documentation"
23
+ }
24
+ ],
25
+ "modelCard": {
26
+ "modelParameters": {
27
+ "datasets": [
28
+ {
29
+ "ref": "EleutherAI/pile-671dd665-c5ab-5ddc-8ee4-6f2955b81787"
30
+ }
31
+ ],
32
+ "task": "text-generation",
33
+ "architectureFamily": "gpt_neox",
34
+ "modelArchitecture": "GPTNeoXForCausalLM"
35
+ },
36
+ "properties": [
37
+ {
38
+ "name": "library_name",
39
+ "value": "transformers"
40
+ }
41
+ ],
42
+ "consideration": {
43
+ "useCases": "## Uses and Limitations\n"
44
+ }
45
+ },
46
+ "name": "EleutherAI/pythia-12b",
47
+ "authors": [
48
+ {
49
+ "name": "EleutherAI"
50
+ }
51
+ ],
52
+ "description": "## Model Details\n\n- Developed by: [EleutherAI](http://eleuther.ai)\n- Model type: Transformer-based Language Model\n- Language: English\n- Learn more: [Pythia's GitHub repository](https://github.com/EleutherAI/pythia)\nfor training procedure, config files, and details on how to use.\n[See paper](https://arxiv.org/pdf/2304.01373.pdf) for more evals and implementation\ndetails.\n- Library: [GPT-NeoX](https://github.com/EleutherAI/gpt-neox)\n- License: Apache 2.0\n- Contact: to ask questions about this model, join the [EleutherAI\nDiscord](https://discord.gg/zBGx3azzUn), and post them in `#release-discussion`.\nPlease read the existing *Pythia* documentation before asking about it in the\nEleutherAI Discord. For general correspondence: [contact@eleuther.\nai](mailto:[email protected]).\n\n<figure>\n\n| Pythia model | Non-Embedding Params | Layers | Model Dim | Heads | Batch Size | Learning Rate | Equivalent Models |\n| -----------: | -------------------: | :----: | :-------: | :---: | :--------: | :-------------------: | :--------------------: |\n| 70M | 18,915,328 | 6 | 512 | 8 | 2M | 1.0 x 10<sup>-3</sup> | \u2014 |\n| 160M | 85,056,000 | 12 | 768 | 12 | 2M | 6.0 x 10<sup>-4</sup> | GPT-Neo 125M, OPT-125M |\n| 410M | 302,311,424 | 24 | 1024 | 16 | 2M | 3.0 x 10<sup>-4</sup> | OPT-350M |\n| 1.0B | 805,736,448 | 16 | 2048 | 8 | 2M | 3.0 x 10<sup>-4</sup> | \u2014 |\n| 1.4B | 1,208,602,624 | 24 | 2048 | 16 | 2M | 2.0 x 10<sup>-4</sup> | GPT-Neo 1.3B, OPT-1.3B |\n| 2.8B | 2,517,652,480 | 32 | 2560 | 32 | 2M | 1.6 x 10<sup>-4</sup> | GPT-Neo 2.7B, OPT-2.7B |\n| 6.9B | 6,444,163,072 | 32 | 4096 | 32 | 2M | 1.2 x 10<sup>-4</sup> | OPT-6.7B |\n| 12B | 11,327,027,200 | 36 | 5120 | 40 | 2M | 1.2 x 10<sup>-4</sup> | \u2014 |\n<figcaption>Engineering details for the <i>Pythia Suite</i>. Deduped and\nnon-deduped models of a given size have the same hyperparameters. \u201cEquivalent\u201d\nmodels have <b>exactly</b> the same architecture, and the same number of\nnon-embedding parameters.</figcaption>\n</figure>\n",
53
+ "tags": [
54
+ "transformers",
55
+ "pytorch",
56
+ "safetensors",
57
+ "gpt_neox",
58
+ "text-generation",
59
+ "causal-lm",
60
+ "pythia",
61
+ "en",
62
+ "dataset:EleutherAI/pile",
63
+ "arxiv:2304.01373",
64
+ "arxiv:2101.00027",
65
+ "arxiv:2201.07311",
66
+ "license:apache-2.0",
67
+ "autotrain_compatible",
68
+ "text-generation-inference",
69
+ "endpoints_compatible",
70
+ "region:us"
71
+ ]
72
+ }
73
+ },
74
+ "components": [
75
+ {
76
+ "type": "data",
77
+ "bom-ref": "EleutherAI/pile-671dd665-c5ab-5ddc-8ee4-6f2955b81787",
78
+ "name": "EleutherAI/pile",
79
+ "data": [
80
+ {
81
+ "type": "dataset",
82
+ "bom-ref": "EleutherAI/pile-671dd665-c5ab-5ddc-8ee4-6f2955b81787",
83
+ "name": "EleutherAI/pile",
84
+ "contents": {
85
+ "url": "https://huggingface.co/datasets/EleutherAI/pile",
86
+ "properties": [
87
+ {
88
+ "name": "task_categories",
89
+ "value": "text-generation, fill-mask"
90
+ },
91
+ {
92
+ "name": "task_ids",
93
+ "value": "language-modeling, masked-language-modeling"
94
+ },
95
+ {
96
+ "name": "language",
97
+ "value": "en"
98
+ },
99
+ {
100
+ "name": "size_categories",
101
+ "value": "100B<n<1T"
102
+ },
103
+ {
104
+ "name": "annotations_creators",
105
+ "value": "no-annotation"
106
+ },
107
+ {
108
+ "name": "language_creators",
109
+ "value": "found"
110
+ },
111
+ {
112
+ "name": "pretty_name",
113
+ "value": "the Pile"
114
+ },
115
+ {
116
+ "name": "source_datasets",
117
+ "value": "original"
118
+ },
119
+ {
120
+ "name": "paperswithcode_id",
121
+ "value": "the-pile"
122
+ },
123
+ {
124
+ "name": "license",
125
+ "value": "other"
126
+ }
127
+ ]
128
+ },
129
+ "description": "The Pile is a 825 GiB diverse, open source language modelling data set that consists of 22 smaller, high-quality\ndatasets combined together.",
130
+ "governance": {
131
+ "owners": [
132
+ {
133
+ "organization": {
134
+ "name": "EleutherAI",
135
+ "url": "https://huggingface.co/EleutherAI"
136
+ }
137
+ }
138
+ ]
139
+ }
140
+ }
141
+ ]
142
+ }
143
+ ]
144
+ }