rotary-embedding-torch modul added in requirement file, updated readme to accees cached directory
Browse files- README.md +15 -9
- requirements.txt +1 -1
- test_ablang2_HF_implementation.ipynb +43 -56
README.md
CHANGED
@@ -41,9 +41,15 @@ This repository provides HuggingFace-compatible π€ implementation of the AbLan
|
|
41 |
Install the required dependencies:
|
42 |
|
43 |
```bash
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
```
|
46 |
|
|
|
|
|
47 |
## π Loading Model from Hugging Face Hub
|
48 |
|
49 |
### Method 1: Load Model and Tokenizer, then Import Adapter
|
@@ -51,14 +57,14 @@ pip install transformers torch numpy pandas anarci
|
|
51 |
import sys
|
52 |
import os
|
53 |
from transformers import AutoModel, AutoTokenizer
|
54 |
-
from
|
55 |
|
56 |
# Load model and tokenizer from Hugging Face Hub
|
57 |
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
58 |
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
59 |
|
60 |
-
#
|
61 |
-
adapter_path =
|
62 |
cached_model_dir = os.path.dirname(adapter_path)
|
63 |
sys.path.insert(0, cached_model_dir)
|
64 |
|
@@ -71,14 +77,14 @@ ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)
|
|
71 |
```python
|
72 |
import importlib.util
|
73 |
from transformers import AutoModel, AutoTokenizer
|
74 |
-
from
|
75 |
|
76 |
# Load model and tokenizer
|
77 |
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
78 |
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
79 |
|
80 |
# Load adapter dynamically
|
81 |
-
adapter_path =
|
82 |
spec = importlib.util.spec_from_file_location("adapter", adapter_path)
|
83 |
adapter_module = importlib.util.module_from_spec(spec)
|
84 |
spec.loader.exec_module(adapter_module)
|
@@ -116,14 +122,14 @@ The `AbLang2PairedHuggingFaceAdapter` class is a wrapper that lets you use AbLan
|
|
116 |
import sys
|
117 |
import os
|
118 |
from transformers import AutoModel, AutoTokenizer
|
119 |
-
from
|
120 |
|
121 |
# 1. Load model and tokenizer from Hugging Face Hub
|
122 |
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
123 |
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
124 |
|
125 |
-
# 2.
|
126 |
-
adapter_path =
|
127 |
cached_model_dir = os.path.dirname(adapter_path)
|
128 |
sys.path.insert(0, cached_model_dir)
|
129 |
from adapter import AbLang2PairedHuggingFaceAdapter
|
|
|
41 |
Install the required dependencies:
|
42 |
|
43 |
```bash
|
44 |
+
# Install core dependencies
|
45 |
+
pip install transformers numpy pandas rotary-embedding-torch
|
46 |
+
|
47 |
+
# Install ANARCI from bioconda (required for antibody numbering)
|
48 |
+
conda install -c bioconda anarci
|
49 |
```
|
50 |
|
51 |
+
**Note**: ANARCI is required for antibody sequence numbering and alignment features. It must be installed from the bioconda channel.
|
52 |
+
|
53 |
## π Loading Model from Hugging Face Hub
|
54 |
|
55 |
### Method 1: Load Model and Tokenizer, then Import Adapter
|
|
|
57 |
import sys
|
58 |
import os
|
59 |
from transformers import AutoModel, AutoTokenizer
|
60 |
+
from huggingface_hub import hf_hub_download
|
61 |
|
62 |
# Load model and tokenizer from Hugging Face Hub
|
63 |
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
64 |
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
65 |
|
66 |
+
# Download adapter and add to path
|
67 |
+
adapter_path = hf_hub_download(repo_id="hemantn/ablang2", filename="adapter.py")
|
68 |
cached_model_dir = os.path.dirname(adapter_path)
|
69 |
sys.path.insert(0, cached_model_dir)
|
70 |
|
|
|
77 |
```python
|
78 |
import importlib.util
|
79 |
from transformers import AutoModel, AutoTokenizer
|
80 |
+
from huggingface_hub import hf_hub_download
|
81 |
|
82 |
# Load model and tokenizer
|
83 |
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
84 |
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
85 |
|
86 |
# Load adapter dynamically
|
87 |
+
adapter_path = hf_hub_download(repo_id="hemantn/ablang2", filename="adapter.py")
|
88 |
spec = importlib.util.spec_from_file_location("adapter", adapter_path)
|
89 |
adapter_module = importlib.util.module_from_spec(spec)
|
90 |
spec.loader.exec_module(adapter_module)
|
|
|
122 |
import sys
|
123 |
import os
|
124 |
from transformers import AutoModel, AutoTokenizer
|
125 |
+
from huggingface_hub import hf_hub_download
|
126 |
|
127 |
# 1. Load model and tokenizer from Hugging Face Hub
|
128 |
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
129 |
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
|
130 |
|
131 |
+
# 2. Download adapter and add to path
|
132 |
+
adapter_path = hf_hub_download(repo_id="hemantn/ablang2", filename="adapter.py")
|
133 |
cached_model_dir = os.path.dirname(adapter_path)
|
134 |
sys.path.insert(0, cached_model_dir)
|
135 |
from adapter import AbLang2PairedHuggingFaceAdapter
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
gradio>=4.0.0
|
2 |
transformers>=4.30.0
|
3 |
-
torch>=2.0.0
|
4 |
numpy>=1.21.0
|
5 |
pandas>=1.3.0
|
|
|
6 |
git+https://github.com/oxpig/ANARCI.git
|
|
|
1 |
gradio>=4.0.0
|
2 |
transformers>=4.30.0
|
|
|
3 |
numpy>=1.21.0
|
4 |
pandas>=1.3.0
|
5 |
+
rotary-embedding-torch>=0.2.0
|
6 |
git+https://github.com/oxpig/ANARCI.git
|
test_ablang2_HF_implementation.ipynb
CHANGED
@@ -29,7 +29,7 @@
|
|
29 |
"import os\n",
|
30 |
"import numpy as np\n",
|
31 |
"from transformers import AutoModel, AutoTokenizer\n",
|
32 |
-
"from
|
33 |
]
|
34 |
},
|
35 |
{
|
@@ -99,12 +99,12 @@
|
|
99 |
{
|
100 |
"data": {
|
101 |
"application/vnd.jupyter.widget-view+json": {
|
102 |
-
"model_id": "
|
103 |
"version_major": 2,
|
104 |
"version_minor": 0
|
105 |
},
|
106 |
"text/plain": [
|
107 |
-
"config.json: 0%| | 0.00/
|
108 |
]
|
109 |
},
|
110 |
"metadata": {},
|
@@ -113,7 +113,7 @@
|
|
113 |
{
|
114 |
"data": {
|
115 |
"application/vnd.jupyter.widget-view+json": {
|
116 |
-
"model_id": "
|
117 |
"version_major": 2,
|
118 |
"version_minor": 0
|
119 |
},
|
@@ -136,7 +136,7 @@
|
|
136 |
{
|
137 |
"data": {
|
138 |
"application/vnd.jupyter.widget-view+json": {
|
139 |
-
"model_id": "
|
140 |
"version_major": 2,
|
141 |
"version_minor": 0
|
142 |
},
|
@@ -161,7 +161,7 @@
|
|
161 |
{
|
162 |
"data": {
|
163 |
"application/vnd.jupyter.widget-view+json": {
|
164 |
-
"model_id": "
|
165 |
"version_major": 2,
|
166 |
"version_minor": 0
|
167 |
},
|
@@ -176,13 +176,13 @@
|
|
176 |
"name": "stdout",
|
177 |
"output_type": "stream",
|
178 |
"text": [
|
179 |
-
"β
Loaded custom weights from: /home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/
|
180 |
]
|
181 |
},
|
182 |
{
|
183 |
"data": {
|
184 |
"application/vnd.jupyter.widget-view+json": {
|
185 |
-
"model_id": "
|
186 |
"version_major": 2,
|
187 |
"version_minor": 0
|
188 |
},
|
@@ -205,7 +205,7 @@
|
|
205 |
{
|
206 |
"data": {
|
207 |
"application/vnd.jupyter.widget-view+json": {
|
208 |
-
"model_id": "
|
209 |
"version_major": 2,
|
210 |
"version_minor": 0
|
211 |
},
|
@@ -219,7 +219,7 @@
|
|
219 |
{
|
220 |
"data": {
|
221 |
"application/vnd.jupyter.widget-view+json": {
|
222 |
-
"model_id": "
|
223 |
"version_major": 2,
|
224 |
"version_minor": 0
|
225 |
},
|
@@ -234,25 +234,12 @@
|
|
234 |
"name": "stdout",
|
235 |
"output_type": "stream",
|
236 |
"text": [
|
237 |
-
"
|
238 |
-
"β
Copied ablang_encodings.py to current directory\n",
|
239 |
-
"β
Copied alignment.py to current directory\n",
|
240 |
-
"β
Copied scores.py to current directory\n",
|
241 |
-
"β
Copied extra_utils.py to current directory\n",
|
242 |
-
"β
Copied ablang.py to current directory\n",
|
243 |
-
"β
Copied encoderblock.py to current directory\n",
|
244 |
-
"π Files in current directory (/home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/88ddbb1fcb11297c3fd439b83500743e6adc25c0):\n",
|
245 |
-
" ablang.py\n",
|
246 |
-
" scores.py\n",
|
247 |
-
" extra_utils.py\n",
|
248 |
-
" ablang_encodings.py\n",
|
249 |
-
" encoderblock.py\n",
|
250 |
-
" modeling_ablang2paired.py\n",
|
251 |
" adapter.py\n",
|
|
|
252 |
" tokenizer_ablang2paired.py\n",
|
253 |
-
"
|
254 |
-
"
|
255 |
-
" configuration_ablang2paired.py\n"
|
256 |
]
|
257 |
}
|
258 |
],
|
@@ -262,7 +249,7 @@
|
|
262 |
"tokenizer = AutoTokenizer.from_pretrained(\"hemantn/ablang2\", trust_remote_code=True)\n",
|
263 |
"\n",
|
264 |
"# Find the cached model directory and import adapter\n",
|
265 |
-
"adapter_path =
|
266 |
"cached_model_dir = os.path.dirname(adapter_path)\n",
|
267 |
"sys.path.insert(0, cached_model_dir)\n",
|
268 |
"\n",
|
@@ -333,16 +320,16 @@
|
|
333 |
{
|
334 |
"data": {
|
335 |
"text/plain": [
|
336 |
-
"array([[-0.
|
337 |
-
" -0.
|
338 |
-
" [-0.
|
339 |
-
" -0.
|
340 |
-
" [-0.
|
341 |
-
" -0.
|
342 |
-
" [-0.
|
343 |
-
" -0.
|
344 |
-
" [-0.
|
345 |
-
" -0.
|
346 |
]
|
347 |
},
|
348 |
"execution_count": 6,
|
@@ -397,12 +384,12 @@
|
|
397 |
" [-0.06784609, 0.69349885, -0.4212398 , ..., -0.24805346,\n",
|
398 |
" -0.39583805, -0.10972726],\n",
|
399 |
" ...,\n",
|
400 |
-
" [-0.
|
401 |
-
" -0.
|
402 |
-
" [
|
403 |
-
" -0.
|
404 |
-
" [-0.
|
405 |
-
" 0.
|
406 |
" array([[-0.40043733, -0.48596814, 0.0886725 , ..., 0.38941646,\n",
|
407 |
" 0.06195956, -0.40999672],\n",
|
408 |
" [-0.54576075, 0.4312959 , -0.3451486 , ..., -0.09285564,\n",
|
@@ -410,12 +397,12 @@
|
|
410 |
" [ 0.0221165 , 0.53196615, -0.30137214, ..., -0.1889072 ,\n",
|
411 |
" -0.32587305, 0.05078396],\n",
|
412 |
" ...,\n",
|
413 |
-
" [
|
414 |
-
"
|
415 |
-
" [-0.
|
416 |
-
"
|
417 |
-
" [-0.
|
418 |
-
" 0.
|
419 |
" array([[-0.26863217, 0.32259187, 0.10813517, ..., 0.03953876,\n",
|
420 |
" 0.18312076, -0.00498045],\n",
|
421 |
" [-0.2165424 , -0.38562432, -0.02696264, ..., 0.20541488,\n",
|
@@ -423,12 +410,12 @@
|
|
423 |
" [-0.41950518, 0.04743317, 0.0048816 , ..., 0.11408642,\n",
|
424 |
" -0.05384652, 0.1025871 ],\n",
|
425 |
" ...,\n",
|
426 |
-
" [-0.
|
427 |
-
" -0.
|
428 |
-
" [ 0.
|
429 |
-
" -0.
|
430 |
-
" [-0.
|
431 |
-
"
|
432 |
" array([[-0.42062947, -0.44009134, 0.00152371, ..., 0.27141467,\n",
|
433 |
" 0.03798106, -0.397461 ],\n",
|
434 |
" [-0.57318133, 0.5258899 , -0.17001636, ..., -0.23864633,\n",
|
|
|
29 |
"import os\n",
|
30 |
"import numpy as np\n",
|
31 |
"from transformers import AutoModel, AutoTokenizer\n",
|
32 |
+
"from huggingface_hub import hf_hub_download"
|
33 |
]
|
34 |
},
|
35 |
{
|
|
|
99 |
{
|
100 |
"data": {
|
101 |
"application/vnd.jupyter.widget-view+json": {
|
102 |
+
"model_id": "ed2d5574bd21463c9244070ab762c31e",
|
103 |
"version_major": 2,
|
104 |
"version_minor": 0
|
105 |
},
|
106 |
"text/plain": [
|
107 |
+
"config.json: 0%| | 0.00/763 [00:00<?, ?B/s]"
|
108 |
]
|
109 |
},
|
110 |
"metadata": {},
|
|
|
113 |
{
|
114 |
"data": {
|
115 |
"application/vnd.jupyter.widget-view+json": {
|
116 |
+
"model_id": "10e1a02037f74d2da6e0860ef914829b",
|
117 |
"version_major": 2,
|
118 |
"version_minor": 0
|
119 |
},
|
|
|
136 |
{
|
137 |
"data": {
|
138 |
"application/vnd.jupyter.widget-view+json": {
|
139 |
+
"model_id": "eaf036440107433f950cf4b8c652d756",
|
140 |
"version_major": 2,
|
141 |
"version_minor": 0
|
142 |
},
|
|
|
161 |
{
|
162 |
"data": {
|
163 |
"application/vnd.jupyter.widget-view+json": {
|
164 |
+
"model_id": "22b9a58a3100420c9e353415e7194af6",
|
165 |
"version_major": 2,
|
166 |
"version_minor": 0
|
167 |
},
|
|
|
176 |
"name": "stdout",
|
177 |
"output_type": "stream",
|
178 |
"text": [
|
179 |
+
"β
Loaded custom weights from: /home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/13d4401549c368256c517dc13b8ed3d8b28d5e87/model.pt\n"
|
180 |
]
|
181 |
},
|
182 |
{
|
183 |
"data": {
|
184 |
"application/vnd.jupyter.widget-view+json": {
|
185 |
+
"model_id": "e1c40183f9104aa1a67bf9b1c3daea0c",
|
186 |
"version_major": 2,
|
187 |
"version_minor": 0
|
188 |
},
|
|
|
205 |
{
|
206 |
"data": {
|
207 |
"application/vnd.jupyter.widget-view+json": {
|
208 |
+
"model_id": "3fadab1179e2438ba88e08efb7819680",
|
209 |
"version_major": 2,
|
210 |
"version_minor": 0
|
211 |
},
|
|
|
219 |
{
|
220 |
"data": {
|
221 |
"application/vnd.jupyter.widget-view+json": {
|
222 |
+
"model_id": "5673cfaa95ac4da78e627c36ad6191b0",
|
223 |
"version_major": 2,
|
224 |
"version_minor": 0
|
225 |
},
|
|
|
234 |
"name": "stdout",
|
235 |
"output_type": "stream",
|
236 |
"text": [
|
237 |
+
"π Files in current directory (/home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/13d4401549c368256c517dc13b8ed3d8b28d5e87):\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
" adapter.py\n",
|
239 |
+
" configuration_ablang2paired.py\n",
|
240 |
" tokenizer_ablang2paired.py\n",
|
241 |
+
" modeling_ablang2paired.py\n",
|
242 |
+
"β
Successfully imported utility modules from cache directory\n"
|
|
|
243 |
]
|
244 |
}
|
245 |
],
|
|
|
249 |
"tokenizer = AutoTokenizer.from_pretrained(\"hemantn/ablang2\", trust_remote_code=True)\n",
|
250 |
"\n",
|
251 |
"# Find the cached model directory and import adapter\n",
|
252 |
+
"adapter_path = hf_hub_download(repo_id=\"hemantn/ablang2\", filename=\"adapter.py\")\n",
|
253 |
"cached_model_dir = os.path.dirname(adapter_path)\n",
|
254 |
"sys.path.insert(0, cached_model_dir)\n",
|
255 |
"\n",
|
|
|
320 |
{
|
321 |
"data": {
|
322 |
"text/plain": [
|
323 |
+
"array([[-0.2520631 , 0.18189636, 0.00887137, ..., 0.15365516,\n",
|
324 |
+
" -0.14508602, -0.13381316],\n",
|
325 |
+
" [-0.24383117, 0.20946886, 0.07412891, ..., 0.15079288,\n",
|
326 |
+
" -0.13847049, -0.07304662],\n",
|
327 |
+
" [-0.20084268, 0.23405147, -0.00103735, ..., 0.07450922,\n",
|
328 |
+
" -0.08084311, -0.21812904],\n",
|
329 |
+
" [-0.12659703, 0.3051279 , -0.15117611, ..., -0.20749238,\n",
|
330 |
+
" -0.10453435, -0.0787883 ],\n",
|
331 |
+
" [-0.2955319 , 0.17239201, 0.05676926, ..., 0.15943624,\n",
|
332 |
+
" -0.16615382, -0.15569784]], shape=(5, 480), dtype=float32)"
|
333 |
]
|
334 |
},
|
335 |
"execution_count": 6,
|
|
|
384 |
" [-0.06784609, 0.69349885, -0.4212398 , ..., -0.24805346,\n",
|
385 |
" -0.39583805, -0.10972726],\n",
|
386 |
" ...,\n",
|
387 |
+
" [-0.02212614, 0.26338235, -0.5558968 , ..., -0.24067189,\n",
|
388 |
+
" -0.11965694, 0.07879876],\n",
|
389 |
+
" [-0.20650092, 0.43451664, -0.09650223, ..., -0.05296766,\n",
|
390 |
+
" -0.04297376, 0.41854134],\n",
|
391 |
+
" [-0.02653179, 0.03729444, 0.13194172, ..., -0.4554279 ,\n",
|
392 |
+
" 0.03723941, 0.17769177]], shape=(238, 480), dtype=float32),\n",
|
393 |
" array([[-0.40043733, -0.48596814, 0.0886725 , ..., 0.38941646,\n",
|
394 |
" 0.06195956, -0.40999672],\n",
|
395 |
" [-0.54576075, 0.4312959 , -0.3451486 , ..., -0.09285564,\n",
|
|
|
397 |
" [ 0.0221165 , 0.53196615, -0.30137214, ..., -0.1889072 ,\n",
|
398 |
" -0.32587305, 0.05078396],\n",
|
399 |
" ...,\n",
|
400 |
+
" [-0.03700298, 0.7739084 , 0.3454928 , ..., -0.03060072,\n",
|
401 |
+
" 0.02420983, -0.48005292],\n",
|
402 |
+
" [-0.03366657, 0.74771184, -0.35423476, ..., -0.08759108,\n",
|
403 |
+
" -0.17898935, -0.4540483 ],\n",
|
404 |
+
" [-0.16625853, 0.2701079 , -0.19761363, ..., 0.10313392,\n",
|
405 |
+
" 0.44890267, -0.64840287]], shape=(238, 480), dtype=float32),\n",
|
406 |
" array([[-0.26863217, 0.32259187, 0.10813517, ..., 0.03953876,\n",
|
407 |
" 0.18312076, -0.00498045],\n",
|
408 |
" [-0.2165424 , -0.38562432, -0.02696264, ..., 0.20541488,\n",
|
|
|
410 |
" [-0.41950518, 0.04743317, 0.0048816 , ..., 0.11408642,\n",
|
411 |
" -0.05384652, 0.1025871 ],\n",
|
412 |
" ...,\n",
|
413 |
+
" [-0.14095458, 0.5860325 , -0.44657114, ..., -0.39150292,\n",
|
414 |
+
" -0.22395667, -0.42516366],\n",
|
415 |
+
" [ 0.29816052, 0.40440455, -0.52062094, ..., 0.08969188,\n",
|
416 |
+
" -0.20792632, -0.2045222 ],\n",
|
417 |
+
" [-0.21370608, 0.23035707, -0.355185 , ..., -0.36726946,\n",
|
418 |
+
" -0.05693531, -0.37847823]], shape=(238, 480), dtype=float32),\n",
|
419 |
" array([[-0.42062947, -0.44009134, 0.00152371, ..., 0.27141467,\n",
|
420 |
" 0.03798106, -0.397461 ],\n",
|
421 |
" [-0.57318133, 0.5258899 , -0.17001636, ..., -0.23864633,\n",
|