rotary-embedding-torch modul added in requirement file, updated readme to accees cached directory

Browse files

Files changed (3) hide show

README.md +15 -9
requirements.txt +1 -1
test_ablang2_HF_implementation.ipynb +43 -56

README.md CHANGED Viewed

@@ -41,9 +41,15 @@ This repository provides HuggingFace-compatible 🤗 implementation of the AbLan
 Install the required dependencies:
 ```bash
-pip install transformers torch numpy pandas anarci
 ```
 ## 🚀 Loading Model from Hugging Face Hub
 ### Method 1: Load Model and Tokenizer, then Import Adapter
@@ -51,14 +57,14 @@ pip install transformers torch numpy pandas anarci
 import sys
 import os
 from transformers import AutoModel, AutoTokenizer
-from transformers.utils import cached_file
 # Load model and tokenizer from Hugging Face Hub
 model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
-# Find the cached model directory and import adapter
-adapter_path = cached_file("hemantn/ablang2", "adapter.py")
 cached_model_dir = os.path.dirname(adapter_path)
 sys.path.insert(0, cached_model_dir)
@@ -71,14 +77,14 @@ ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)
 ```python
 import importlib.util
 from transformers import AutoModel, AutoTokenizer
-from transformers.utils import cached_file
 # Load model and tokenizer
 model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 # Load adapter dynamically
-adapter_path = cached_file("hemantn/ablang2", "adapter.py")
 spec = importlib.util.spec_from_file_location("adapter", adapter_path)
 adapter_module = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(adapter_module)
@@ -116,14 +122,14 @@ The `AbLang2PairedHuggingFaceAdapter` class is a wrapper that lets you use AbLan
 import sys
 import os
 from transformers import AutoModel, AutoTokenizer
-from transformers.utils import cached_file
 # 1. Load model and tokenizer from Hugging Face Hub
 model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
-# 2. Import adapter
-adapter_path = cached_file("hemantn/ablang2", "adapter.py")
 cached_model_dir = os.path.dirname(adapter_path)
 sys.path.insert(0, cached_model_dir)
 from adapter import AbLang2PairedHuggingFaceAdapter

 Install the required dependencies:
 ```bash
+# Install core dependencies
+pip install transformers numpy pandas rotary-embedding-torch
+# Install ANARCI from bioconda (required for antibody numbering)
+conda install -c bioconda anarci
 ```
+**Note**: ANARCI is required for antibody sequence numbering and alignment features. It must be installed from the bioconda channel.
 ## 🚀 Loading Model from Hugging Face Hub
 ### Method 1: Load Model and Tokenizer, then Import Adapter
 import sys
 import os
 from transformers import AutoModel, AutoTokenizer
+from huggingface_hub import hf_hub_download
 # Load model and tokenizer from Hugging Face Hub
 model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
+# Download adapter and add to path
+adapter_path = hf_hub_download(repo_id="hemantn/ablang2", filename="adapter.py")
 cached_model_dir = os.path.dirname(adapter_path)
 sys.path.insert(0, cached_model_dir)
 ```python
 import importlib.util
 from transformers import AutoModel, AutoTokenizer
+from huggingface_hub import hf_hub_download
 # Load model and tokenizer
 model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 # Load adapter dynamically
+adapter_path = hf_hub_download(repo_id="hemantn/ablang2", filename="adapter.py")
 spec = importlib.util.spec_from_file_location("adapter", adapter_path)
 adapter_module = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(adapter_module)
 import sys
 import os
 from transformers import AutoModel, AutoTokenizer
+from huggingface_hub import hf_hub_download
 # 1. Load model and tokenizer from Hugging Face Hub
 model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
+# 2. Download adapter and add to path
+adapter_path = hf_hub_download(repo_id="hemantn/ablang2", filename="adapter.py")
 cached_model_dir = os.path.dirname(adapter_path)
 sys.path.insert(0, cached_model_dir)
 from adapter import AbLang2PairedHuggingFaceAdapter

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 gradio>=4.0.0
 transformers>=4.30.0
-torch>=2.0.0
 numpy>=1.21.0
 pandas>=1.3.0
 git+https://github.com/oxpig/ANARCI.git

 gradio>=4.0.0
 transformers>=4.30.0
 numpy>=1.21.0
 pandas>=1.3.0
+rotary-embedding-torch>=0.2.0
 git+https://github.com/oxpig/ANARCI.git

test_ablang2_HF_implementation.ipynb CHANGED Viewed

@@ -29,7 +29,7 @@
     "import os\n",
     "import numpy as np\n",
     "from transformers import AutoModel, AutoTokenizer\n",
-    "from transformers.utils import cached_file"
    ]
   },
   {
@@ -99,12 +99,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "faac2d2a21ea4775b88db5f4af82a16a",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
@@ -113,7 +113,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6b4ea4ff07cb443cb4a6ac72e23faab9",
        "version_major": 2,
        "version_minor": 0
       },
@@ -136,7 +136,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "23a70f36648e4242bb32595ab85a7c36",
        "version_major": 2,
        "version_minor": 0
       },
@@ -161,7 +161,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "15d6e81710c342d783f3a22117dbda0e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -176,13 +176,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "✅ Loaded custom weights from: /home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/88ddbb1fcb11297c3fd439b83500743e6adc25c0/model.pt\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6e2ce5f42c0a43028468ee2f24bd66c2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -205,7 +205,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f0474902d8bb4ca0afa50c475b90769e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -219,7 +219,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1d767519b798472f8500ba43872651ef",
        "version_major": 2,
        "version_minor": 0
       },
@@ -234,25 +234,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "✅ Copied restoration.py to current directory\n",
-      "✅ Copied ablang_encodings.py to current directory\n",
-      "✅ Copied alignment.py to current directory\n",
-      "✅ Copied scores.py to current directory\n",
-      "✅ Copied extra_utils.py to current directory\n",
-      "✅ Copied ablang.py to current directory\n",
-      "✅ Copied encoderblock.py to current directory\n",
-      "📁 Files in current directory (/home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/88ddbb1fcb11297c3fd439b83500743e6adc25c0):\n",
-      "   ablang.py\n",
-      "   scores.py\n",
-      "   extra_utils.py\n",
-      "   ablang_encodings.py\n",
-      "   encoderblock.py\n",
-      "   modeling_ablang2paired.py\n",
       "   adapter.py\n",
       "   tokenizer_ablang2paired.py\n",
-      "   restoration.py\n",
-      "   alignment.py\n",
-      "   configuration_ablang2paired.py\n"
      ]
     }
    ],
@@ -262,7 +249,7 @@
     "tokenizer = AutoTokenizer.from_pretrained(\"hemantn/ablang2\", trust_remote_code=True)\n",
     "\n",
     "# Find the cached model directory and import adapter\n",
-    "adapter_path = cached_file(\"hemantn/ablang2\", \"adapter.py\")\n",
     "cached_model_dir = os.path.dirname(adapter_path)\n",
     "sys.path.insert(0, cached_model_dir)\n",
     "\n",
@@ -333,16 +320,16 @@
     {
      "data": {
       "text/plain": [
-       "array([[-0.25206311,  0.18189634,  0.00887137, ...,  0.15365517,\n",
-       "        -0.14508603, -0.13381317],\n",
-       "       [-0.25149415,  0.2086455 ,  0.07518203, ...,  0.19478269,\n",
-       "        -0.15227772, -0.08241647],\n",
-       "       [-0.27468949,  0.16507216,  0.08667156, ...,  0.18776284,\n",
-       "        -0.14165082, -0.16389885],\n",
-       "       [-0.1982213 ,  0.16841085, -0.04925933, ...,  0.11400164,\n",
-       "        -0.14723683, -0.09713171],\n",
-       "       [-0.29553188,  0.17239201,  0.05676926, ...,  0.15943622,\n",
-       "        -0.16615383, -0.15569784]], shape=(5, 480))"
       ]
      },
      "execution_count": 6,
@@ -397,12 +384,12 @@
        "        [-0.06784609,  0.69349885, -0.4212398 , ..., -0.24805346,\n",
        "         -0.39583805, -0.10972726],\n",
        "        ...,\n",
-       "        [-0.2090099 ,  0.29489496, -0.11039071, ..., -0.24245434,\n",
-       "         -0.60625184, -0.02307999],\n",
-       "        [ 0.19134358,  0.21744648,  0.2575827 , ...,  0.15845427,\n",
-       "         -0.34743664,  0.10218249],\n",
-       "        [-0.2551157 , -0.21778448,  0.21906358, ..., -0.09656111,\n",
-       "          0.22394855, -0.20267345]], shape=(222, 480), dtype=float32),\n",
        " array([[-0.40043733, -0.48596814,  0.0886725 , ...,  0.38941646,\n",
        "          0.06195956, -0.40999672],\n",
        "        [-0.54576075,  0.4312959 , -0.3451486 , ..., -0.09285564,\n",
@@ -410,12 +397,12 @@
        "        [ 0.0221165 ,  0.53196615, -0.30137214, ..., -0.1889072 ,\n",
        "         -0.32587305,  0.05078396],\n",
        "        ...,\n",
-       "        [ 0.2630385 , -0.22976042,  0.5510368 , ...,  0.47436473,\n",
-       "         -0.42733562, -0.83135855],\n",
-       "        [-0.13752195,  0.28678602, -0.18887053, ...,  0.28262627,\n",
-       "          0.1254679 , -0.6496486 ],\n",
-       "        [-0.4541417 ,  0.24564984,  0.2132735 , ...,  0.03287445,\n",
-       "          0.03825552, -0.34259132]], shape=(124, 480), dtype=float32),\n",
        " array([[-0.26863217,  0.32259187,  0.10813517, ...,  0.03953876,\n",
        "          0.18312076, -0.00498045],\n",
        "        [-0.2165424 , -0.38562432, -0.02696264, ...,  0.20541488,\n",
@@ -423,12 +410,12 @@
        "        [-0.41950518,  0.04743317,  0.0048816 , ...,  0.11408642,\n",
        "         -0.05384652,  0.1025871 ],\n",
        "        ...,\n",
-       "        [-0.10960457,  0.35151365, -0.21752454, ..., -0.21448943,\n",
-       "         -0.6396219 , -0.00839792],\n",
-       "        [ 0.20491892,  0.36294487,  0.19217414, ...,  0.07750722,\n",
-       "         -0.5039212 ,  0.03793833],\n",
-       "        [-0.11638474, -0.35350856,  0.13215722, ..., -0.1606055 ,\n",
-       "          0.23913842, -0.2565337 ]], shape=(115, 480), dtype=float32),\n",
        " array([[-0.42062947, -0.44009134,  0.00152371, ...,  0.27141467,\n",
        "          0.03798106, -0.397461  ],\n",
        "        [-0.57318133,  0.5258899 , -0.17001636, ..., -0.23864633,\n",

     "import os\n",
     "import numpy as np\n",
     "from transformers import AutoModel, AutoTokenizer\n",
+    "from huggingface_hub import hf_hub_download"
    ]
   },
   {
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ed2d5574bd21463c9244070ab762c31e",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "config.json:   0%|          | 0.00/763 [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "10e1a02037f74d2da6e0860ef914829b",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eaf036440107433f950cf4b8c652d756",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "22b9a58a3100420c9e353415e7194af6",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "✅ Loaded custom weights from: /home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/13d4401549c368256c517dc13b8ed3d8b28d5e87/model.pt\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e1c40183f9104aa1a67bf9b1c3daea0c",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3fadab1179e2438ba88e08efb7819680",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5673cfaa95ac4da78e627c36ad6191b0",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "📁 Files in current directory (/home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/13d4401549c368256c517dc13b8ed3d8b28d5e87):\n",
       "   adapter.py\n",
+      "   configuration_ablang2paired.py\n",
       "   tokenizer_ablang2paired.py\n",
+      "   modeling_ablang2paired.py\n",
+      "✅ Successfully imported utility modules from cache directory\n"
      ]
     }
    ],
     "tokenizer = AutoTokenizer.from_pretrained(\"hemantn/ablang2\", trust_remote_code=True)\n",
     "\n",
     "# Find the cached model directory and import adapter\n",
+    "adapter_path = hf_hub_download(repo_id=\"hemantn/ablang2\", filename=\"adapter.py\")\n",
     "cached_model_dir = os.path.dirname(adapter_path)\n",
     "sys.path.insert(0, cached_model_dir)\n",
     "\n",
     {
      "data": {
       "text/plain": [
+       "array([[-0.2520631 ,  0.18189636,  0.00887137, ...,  0.15365516,\n",
+       "        -0.14508602, -0.13381316],\n",
+       "       [-0.24383117,  0.20946886,  0.07412891, ...,  0.15079288,\n",
+       "        -0.13847049, -0.07304662],\n",
+       "       [-0.20084268,  0.23405147, -0.00103735, ...,  0.07450922,\n",
+       "        -0.08084311, -0.21812904],\n",
+       "       [-0.12659703,  0.3051279 , -0.15117611, ..., -0.20749238,\n",
+       "        -0.10453435, -0.0787883 ],\n",
+       "       [-0.2955319 ,  0.17239201,  0.05676926, ...,  0.15943624,\n",
+       "        -0.16615382, -0.15569784]], shape=(5, 480), dtype=float32)"
       ]
      },
      "execution_count": 6,
        "        [-0.06784609,  0.69349885, -0.4212398 , ..., -0.24805346,\n",
        "         -0.39583805, -0.10972726],\n",
        "        ...,\n",
+       "        [-0.02212614,  0.26338235, -0.5558968 , ..., -0.24067189,\n",
+       "         -0.11965694,  0.07879876],\n",
+       "        [-0.20650092,  0.43451664, -0.09650223, ..., -0.05296766,\n",
+       "         -0.04297376,  0.41854134],\n",
+       "        [-0.02653179,  0.03729444,  0.13194172, ..., -0.4554279 ,\n",
+       "          0.03723941,  0.17769177]], shape=(238, 480), dtype=float32),\n",
        " array([[-0.40043733, -0.48596814,  0.0886725 , ...,  0.38941646,\n",
        "          0.06195956, -0.40999672],\n",
        "        [-0.54576075,  0.4312959 , -0.3451486 , ..., -0.09285564,\n",
        "        [ 0.0221165 ,  0.53196615, -0.30137214, ..., -0.1889072 ,\n",
        "         -0.32587305,  0.05078396],\n",
        "        ...,\n",
+       "        [-0.03700298,  0.7739084 ,  0.3454928 , ..., -0.03060072,\n",
+       "          0.02420983, -0.48005292],\n",
+       "        [-0.03366657,  0.74771184, -0.35423476, ..., -0.08759108,\n",
+       "         -0.17898935, -0.4540483 ],\n",
+       "        [-0.16625853,  0.2701079 , -0.19761363, ...,  0.10313392,\n",
+       "          0.44890267, -0.64840287]], shape=(238, 480), dtype=float32),\n",
        " array([[-0.26863217,  0.32259187,  0.10813517, ...,  0.03953876,\n",
        "          0.18312076, -0.00498045],\n",
        "        [-0.2165424 , -0.38562432, -0.02696264, ...,  0.20541488,\n",
        "        [-0.41950518,  0.04743317,  0.0048816 , ...,  0.11408642,\n",
        "         -0.05384652,  0.1025871 ],\n",
        "        ...,\n",
+       "        [-0.14095458,  0.5860325 , -0.44657114, ..., -0.39150292,\n",
+       "         -0.22395667, -0.42516366],\n",
+       "        [ 0.29816052,  0.40440455, -0.52062094, ...,  0.08969188,\n",
+       "         -0.20792632, -0.2045222 ],\n",
+       "        [-0.21370608,  0.23035707, -0.355185  , ..., -0.36726946,\n",
+       "         -0.05693531, -0.37847823]], shape=(238, 480), dtype=float32),\n",
        " array([[-0.42062947, -0.44009134,  0.00152371, ...,  0.27141467,\n",
        "          0.03798106, -0.397461  ],\n",
        "        [-0.57318133,  0.5258899 , -0.17001636, ..., -0.23864633,\n",