Add ablang.py and encoderblock.py to root directory for Hugging Face compatibility

Browse files

Files changed (5) hide show

ablang.py +181 -0
encoderblock.py +173 -0
modeling_ablang2paired.py +10 -20
test_ablang2_HF_implementation.ipynb +74 -297
test_module_loading.py +19 -0

ablang.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from dataclasses import dataclass
+from typing import Optional, Tuple
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .encoderblock import TransformerEncoder, get_activation_fn
+class AbLang(torch.nn.Module):
+    """
+    AbLang inspired by ESM-2's architecture.
+    """
+    def __init__(
+        self,
+        vocab_size,
+        hidden_embed_size,
+        n_attn_heads,
+        n_encoder_blocks,
+        padding_tkn,
+        mask_tkn,
+        layer_norm_eps: float = 1e-12,
+        a_fn: str = "gelu",
+        dropout: float = 0.0,
+    ):
+        super().__init__()
+        self.AbRep = AbRep(
+            vocab_size,
+            hidden_embed_size,
+            n_attn_heads,
+            n_encoder_blocks,
+            padding_tkn,
+            mask_tkn,
+            layer_norm_eps,
+            a_fn,
+            dropout,
+        )
+        self.AbHead = AbHead(
+            vocab_size,
+            hidden_embed_size,
+            self.AbRep.aa_embed_layer.weight,
+            layer_norm_eps,
+            a_fn,
+        )
+    def forward(self, tokens, return_attn_weights=False, return_rep_layers=[]):
+        representations = self.AbRep(tokens, return_attn_weights, return_rep_layers)
+        if return_attn_weights:
+            return representations.attention_weights
+        elif return_rep_layers != []:
+            return representations.many_hidden_states
+        else:
+            likelihoods = self.AbHead(representations.last_hidden_states)
+            return likelihoods
+    def get_aa_embeddings(self):
+        "Extracts the trained aa_embeddings."
+        return self.AbRep.aa_embed_layer
+class AbRep(torch.nn.Module):
+    """
+    AbRep (antibody representations), takes the tokenized sequence and create hidden_embed (representations).
+    """
+    def __init__(
+        self,
+        vocab_size,
+        hidden_embed_size,
+        n_attn_heads,
+        n_encoder_blocks,
+        padding_tkn,
+        mask_tkn,
+        layer_norm_eps: float = 1e-12,
+        a_fn: str = "gelu",
+        dropout: float = 0.1,
+    ):
+        super().__init__()
+        self.padding_tkn = padding_tkn
+        self.mask_tkn = mask_tkn
+        self.aa_embed_layer = nn.Embedding(
+            vocab_size,
+            hidden_embed_size,
+            padding_idx=padding_tkn,
+        )
+        self.encoder_blocks = nn.ModuleList(
+            [TransformerEncoder(
+                hidden_embed_size,
+                n_attn_heads,
+                attn_dropout = dropout,
+                layer_norm_eps = layer_norm_eps,
+                a_fn = a_fn,
+            ) for _ in range(n_encoder_blocks)]
+        )
+        self.layer_norm_after_encoder_blocks = nn.LayerNorm(hidden_embed_size, eps=layer_norm_eps)
+    def forward(self,
+                tokens,
+                return_attn_weights=False,
+                return_rep_layers=[],
+               ):
+        assert tokens.ndim == 2
+        padding_mask = tokens.eq(self.padding_tkn)
+        hidden_embed = self.aa_embed_layer(tokens)
+        return_rep_layers = set(return_rep_layers)
+        rep_layers = {}
+        if 0 in return_rep_layers: rep_layers[0] = hidden_embed
+        all_attn_weights = []
+        for n_layer, encoder_block in enumerate(self.encoder_blocks):
+            hidden_embed, attn_weights = encoder_block(hidden_embed, padding_mask, return_attn_weights)
+            if (n_layer + 1) in return_rep_layers:
+                rep_layers[n_layer + 1] = hidden_embed
+            if return_attn_weights:
+                all_attn_weights.append(attn_weights)
+        hidden_embed = self.layer_norm_after_encoder_blocks(hidden_embed)
+        return DataAbRep(
+            last_hidden_states=hidden_embed,
+            many_hidden_states=rep_layers,
+            attention_weights=all_attn_weights
+        )
+class AbHead(torch.nn.Module):
+    """
+    AbHead (antibody head model), creates amino acid probabilities for each position based on the hidden_embed (representations).
+    """
+    def __init__(
+        self,
+        vocab_size,
+        hidden_embed_size,
+        weights,
+        layer_norm_eps: float = 1e-12,
+        a_fn: str = "gelu",
+    ):
+        super().__init__()
+        activation_fn, scale = get_activation_fn(a_fn)
+        self.ff = torch.nn.Sequential(
+            nn.Linear(hidden_embed_size, hidden_embed_size * scale),
+            activation_fn(),
+            nn.LayerNorm(hidden_embed_size, eps=layer_norm_eps),
+        )
+        self.weights = weights
+        self.bias = nn.Parameter(torch.zeros(vocab_size))
+    def forward(self, hidden_embed):
+        hidden_embed = self.ff(hidden_embed)
+        logits = F.linear(hidden_embed, self.weights) + self.bias
+        return logits
+@dataclass
+class DataAbRep():
+    """
+    Dataclass used to store AbRep output.
+    """
+    last_hidden_states: torch.FloatTensor
+    many_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
+    attention_weights: Optional[Tuple[torch.FloatTensor]] = None

encoderblock.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import torch
+import math
+from torch import nn
+import torch.nn.functional as F
+import einops
+from rotary_embedding_torch import RotaryEmbedding
+class TransformerEncoder(torch.nn.Module):
+    """
+    Single Transformer Encoder.
+    """
+    def __init__(
+        self,
+        hidden_embed_size,
+        n_attn_heads,
+        attn_dropout: float = 0.0,
+        layer_norm_eps: float = 1e-05,
+        a_fn: str = "gelu",
+    ):
+        super().__init__()
+        assert hidden_embed_size % n_attn_heads == 0, \
+        "Embedding dimension must be devisible with the number of heads."
+        self.multihead_attention = MultiHeadAttention(
+            embed_dim = hidden_embed_size,
+            num_heads = n_attn_heads,
+            attention_dropout_prob = attn_dropout
+        )
+        activation_fn, scale = get_activation_fn(a_fn)
+        self.intermediate_layer = torch.nn.Sequential(
+            torch.nn.Linear(hidden_embed_size, hidden_embed_size * 4 * scale),
+            activation_fn(),
+            torch.nn.Linear(hidden_embed_size * 4, hidden_embed_size),
+        )
+        self.pre_attn_layer_norm = torch.nn.LayerNorm(hidden_embed_size, eps=layer_norm_eps)
+        self.final_layer_norm = torch.nn.LayerNorm(hidden_embed_size, eps=layer_norm_eps)
+    def forward(self, hidden_embed, attn_mask=None, return_attn_weights: bool = False):
+        residual = hidden_embed
+        hidden_embed = self.pre_attn_layer_norm(hidden_embed.clone())
+        hidden_embed, attn_weights = self.multihead_attention(
+            hidden_embed,
+            attn_mask=attn_mask,
+            return_attn_weights=return_attn_weights
+        )
+        hidden_embed = residual + hidden_embed
+        residual = hidden_embed
+        hidden_embed = self.final_layer_norm(hidden_embed)
+        hidden_embed = self.intermediate_layer(hidden_embed)
+        hidden_embed = residual + hidden_embed
+        return hidden_embed, attn_weights
+class MultiHeadAttention(torch.nn.Module):
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        attention_dropout_prob: float = 0.0,
+        bias: bool = True,
+    ):
+        super().__init__()
+        self.attention_dropout = torch.nn.Dropout(attention_dropout_prob)
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+        assert (self.head_dim * num_heads == self.embed_dim), "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim**-0.5
+        self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        self.v_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        self.reset_parameters()
+        self.rotary_emb = RotaryEmbedding(dim = self.head_dim)
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
+        nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
+        nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.out_proj.bias is not None:
+            nn.init.constant_(self.out_proj.bias, 0.0)
+    def attention(self, q, k, v, attn_mask=None):
+        attn_weights = torch.matmul(q, k.transpose(-2, -1))
+        attn_weights = attn_weights / math.sqrt(self.head_dim)
+        if attn_mask is not None:
+            attn_mask = einops.rearrange(
+                attn_mask,
+                'b_size (h1 h2 seq_len) -> b_size h1 h2 seq_len',
+                h1=1, h2=1
+            )
+            attn_weights = attn_weights.masked_fill(attn_mask, float("-inf"))
+        attn_weights = F.softmax(attn_weights, dim=-1)
+        attn = self.attention_dropout(attn_weights)
+        attn = torch.matmul(attn, v)
+        return attn, attn_weights
+    def forward(self, x, attn_mask=None, return_attn_weights: bool = False):
+        batch_size, seq_len, embed_dim = x.size()
+        q, k, v = self.q_proj(x), self.k_proj(x), self.v_proj(x)
+        q *= self.scaling
+        q = q.contiguous().view(
+            batch_size,
+            seq_len,
+            self.num_heads,
+            self.head_dim
+        ).transpose(1, 2) # [n_batch, n_heads, seq_len, head_dim]
+        k = k.contiguous().view(
+            batch_size,
+            seq_len,
+            self.num_heads,
+            self.head_dim
+        ).transpose(1, 2) # [n_batch, n_heads, seq_len, head_dim]
+        v = v.contiguous().view(
+            batch_size,
+            seq_len,
+            self.num_heads,
+            self.head_dim
+        ).transpose(1, 2) # [n_batch, n_heads, seq_len, head_dim]
+        q = self.rotary_emb.rotate_queries_or_keys(q)
+        k = self.rotary_emb.rotate_queries_or_keys(k)
+        # Determine value outputs
+        attn, attn_weights = self.attention(
+            q, k, v,
+            attn_mask=attn_mask
+        ) # attn_weights [n_batch, n_heads, seq_len (target), seq_len (source)]
+        attn = attn.transpose(1, 2).reshape(batch_size, seq_len, embed_dim)
+        attn = self.out_proj(attn)
+        if return_attn_weights:
+            return attn, attn_weights
+        else:
+            return attn, None
+class SwiGLU(torch.nn.Module):
+    def forward(self, x):
+        x, gate = x.chunk(2, dim=-1)
+        return F.silu(gate) * x
+def get_activation_fn(a_fn):
+    if a_fn == "gelu":
+        return torch.nn.GELU, 1
+    elif a_fn == "swiglu":
+        return SwiGLU, 2

modeling_ablang2paired.py CHANGED Viewed

@@ -9,29 +9,19 @@ try:
 except ImportError:
     from configuration_ablang2paired import AbLang2PairedConfig
-# Import the AbLang model from the local file structure
-import importlib.util
-import os
-def load_ablang_module():
-    """Load the AbLang module from the local directory structure."""
-    # Try to find the ablang.py file in the local directory
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-    ablang_path = os.path.join(current_dir, "ablang2", "models", "ablang2", "ablang.py")
-    if os.path.exists(ablang_path):
-        spec = importlib.util.spec_from_file_location("ablang", ablang_path)
-        ablang_module = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(ablang_module)
-        return ablang_module.AbLang
-    else:
-        # If not found, raise an error with helpful message
         raise ImportError(
-            "Could not find AbLang module. Please ensure the ablang2 directory structure is present "
-            "in the repository."
         )
-AbLang = load_ablang_module()
 class AbLang2PairedHFModel(PreTrainedModel):
     config_class = AbLang2PairedConfig

 except ImportError:
     from configuration_ablang2paired import AbLang2PairedConfig
+# Import the AbLang model from local files
+try:
+    from ablang import AbLang
+except ImportError:
+    # Fallback: try to import from the current directory
+    try:
+        from .ablang import AbLang
+    except ImportError:
         raise ImportError(
+            "Could not find AbLang module. Please ensure ablang.py is present in the repository."
         )
 class AbLang2PairedHFModel(PreTrainedModel):
     config_class = AbLang2PairedConfig

test_ablang2_HF_implementation.ipynb CHANGED Viewed

@@ -86,34 +86,77 @@
    "id": "6d66ad84",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "A new version of the following files was downloaded from https://huggingface.co/hemantn/ablang2:\n",
       "- configuration_ablang2paired.py\n",
-      ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
-      "A new version of the following files was downloaded from https://huggingface.co/hemantn/ablang2:\n",
-      "- modeling_ablang2paired.py\n",
-      ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
-      "/home/hn533621/.conda/envs/lib_transformer/lib/python3.10/site-packages/huggingface_hub/file_download.py:943: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
-      "  warnings.warn(\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "✅ Loaded custom weights from: /home/hn533621/.cache/huggingface/hub/models--hemantn--ablang2/snapshots/e1df3c0a25269eaeb91c4891125dd9a8580a01b7/model.pt\n"
-     ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "A new version of the following files was downloaded from https://huggingface.co/hemantn/ablang2:\n",
-      "- tokenizer_ablang2paired.py\n",
-      ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
      ]
     }
    ],
@@ -162,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "ceae4a88-0679-4704-8bad-c06a4569c497",
    "metadata": {},
    "outputs": [],
@@ -187,30 +230,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "id": "d22f4302-1262-4cc1-8a1c-a36daa8c710c",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-0.25206311,  0.18189634,  0.00887137, ...,  0.15365517,\n",
-       "        -0.14508603, -0.13381317],\n",
-       "       [-0.25149415,  0.2086455 ,  0.07518203, ...,  0.19478269,\n",
-       "        -0.15227772, -0.08241647],\n",
-       "       [-0.27468949,  0.16507216,  0.08667156, ...,  0.18776284,\n",
-       "        -0.14165082, -0.16389885],\n",
-       "       [-0.1982213 ,  0.16841085, -0.04925933, ...,  0.11400164,\n",
-       "        -0.14723683, -0.09713171],\n",
-       "       [-0.29553188,  0.17239201,  0.05676926, ...,  0.15943622,\n",
-       "        -0.16615383, -0.15569784]], shape=(5, 480))"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "ablang(all_seqs, mode='seqcoding')\n"
    ]
@@ -231,85 +254,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "6227f661-575f-4b1e-9646-cfba7b10c3b4",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[array([[-0.40741208, -0.5118987 ,  0.06096708, ...,  0.3268144 ,\n",
-       "          0.03920235, -0.36715826],\n",
-       "        [-0.5768883 ,  0.38245413, -0.21791998, ...,  0.01250262,\n",
-       "         -0.08844463, -0.32367525],\n",
-       "        [-0.1475935 ,  0.39639047, -0.38226923, ..., -0.10119921,\n",
-       "         -0.41469565, -0.00319315],\n",
-       "        ...,\n",
-       "        [-0.14358369,  0.3124389 , -0.30157998, ..., -0.13289244,\n",
-       "         -0.45353398, -0.07878865],\n",
-       "        [ 0.17538925,  0.24394299,  0.20141171, ...,  0.14587352,\n",
-       "         -0.38479003,  0.07409196],\n",
-       "        [-0.23031706, -0.35487285,  0.1960684 , ..., -0.1283362 ,\n",
-       "          0.31107333, -0.3265108 ]], shape=(238, 480), dtype=float32),\n",
-       " array([[-0.41981837, -0.3666375 ,  0.10595217, ...,  0.3903574 ,\n",
-       "          0.0382378 , -0.36337993],\n",
-       "        [-0.5054137 ,  0.38347068, -0.10992069, ..., -0.05231472,\n",
-       "         -0.13636623, -0.34830108],\n",
-       "        [-0.06784609,  0.69349885, -0.4212398 , ..., -0.24805346,\n",
-       "         -0.39583805, -0.10972726],\n",
-       "        ...,\n",
-       "        [-0.2090099 ,  0.29489496, -0.11039071, ..., -0.24245434,\n",
-       "         -0.60625184, -0.02307999],\n",
-       "        [ 0.19134358,  0.21744648,  0.2575827 , ...,  0.15845427,\n",
-       "         -0.34743664,  0.10218249],\n",
-       "        [-0.2551157 , -0.21778448,  0.21906358, ..., -0.09656111,\n",
-       "          0.22394855, -0.20267345]], shape=(222, 480), dtype=float32),\n",
-       " array([[-0.40043733, -0.48596814,  0.0886725 , ...,  0.38941646,\n",
-       "          0.06195956, -0.40999672],\n",
-       "        [-0.54576075,  0.4312959 , -0.3451486 , ..., -0.09285564,\n",
-       "          0.03116508, -0.45269737],\n",
-       "        [ 0.0221165 ,  0.53196615, -0.30137214, ..., -0.1889072 ,\n",
-       "         -0.32587305,  0.05078396],\n",
-       "        ...,\n",
-       "        [ 0.2630385 , -0.22976042,  0.5510368 , ...,  0.47436473,\n",
-       "         -0.42733562, -0.83135855],\n",
-       "        [-0.13752195,  0.28678602, -0.18887053, ...,  0.28262627,\n",
-       "          0.1254679 , -0.6496486 ],\n",
-       "        [-0.4541417 ,  0.24564984,  0.2132735 , ...,  0.03287445,\n",
-       "          0.03825552, -0.34259132]], shape=(124, 480), dtype=float32),\n",
-       " array([[-0.26863217,  0.32259187,  0.10813517, ...,  0.03953876,\n",
-       "          0.18312076, -0.00498045],\n",
-       "        [-0.2165424 , -0.38562432, -0.02696264, ...,  0.20541488,\n",
-       "          0.18698391, -0.22639504],\n",
-       "        [-0.41950518,  0.04743317,  0.0048816 , ...,  0.11408642,\n",
-       "         -0.05384652,  0.1025871 ],\n",
-       "        ...,\n",
-       "        [-0.10960457,  0.35151365, -0.21752454, ..., -0.21448943,\n",
-       "         -0.6396219 , -0.00839792],\n",
-       "        [ 0.20491892,  0.36294487,  0.19217414, ...,  0.07750722,\n",
-       "         -0.5039212 ,  0.03793833],\n",
-       "        [-0.11638474, -0.35350856,  0.13215722, ..., -0.1606055 ,\n",
-       "          0.23913842, -0.2565337 ]], shape=(115, 480), dtype=float32),\n",
-       " array([[-0.42062947, -0.44009134,  0.00152371, ...,  0.27141467,\n",
-       "          0.03798106, -0.397461  ],\n",
-       "        [-0.57318133,  0.5258899 , -0.17001636, ..., -0.23864633,\n",
-       "          0.2088059 , -0.57877594],\n",
-       "        [-0.38988614,  0.46168196, -0.3429413 , ..., -0.14872643,\n",
-       "         -0.46576905, -0.21224979],\n",
-       "        ...,\n",
-       "        [-0.21528634,  0.30046722, -0.25216463, ..., -0.11576828,\n",
-       "         -0.4704907 , -0.0740136 ],\n",
-       "        [ 0.0633081 ,  0.22700705,  0.28184187, ...,  0.15967266,\n",
-       "         -0.377182  ,  0.06188517],\n",
-       "        [-0.27826303, -0.37297496,  0.21229912, ..., -0.14886017,\n",
-       "          0.24998347, -0.35954213]], shape=(238, 480), dtype=float32)]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "ablang(all_seqs, mode='rescoding', stepwise_masking = False)"
    ]
@@ -330,80 +278,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "e4bc0cb1-f5b0-4255-9e93-d643ae1396df",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['<' '1 ' '2 ' '3 ' '4 ' '5 ' '6 ' '7 ' '8 ' '9 ' '11 ' '12 ' '13 ' '14 '\n",
-      " '15 ' '16 ' '17 ' '18 ' '19 ' '20 ' '21 ' '22 ' '23 ' '24 ' '25 ' '26 '\n",
-      " '27 ' '28 ' '29 ' '30 ' '35 ' '36 ' '37 ' '38 ' '39 ' '40 ' '41 ' '42 '\n",
-      " '43 ' '44 ' '45 ' '46 ' '47 ' '48 ' '49 ' '50 ' '51 ' '52 ' '53 ' '54 '\n",
-      " '55 ' '56 ' '57 ' '58 ' '59 ' '62 ' '63 ' '64 ' '65 ' '66 ' '67 ' '68 '\n",
-      " '69 ' '70 ' '71 ' '72 ' '74 ' '75 ' '76 ' '77 ' '78 ' '79 ' '80 ' '81 '\n",
-      " '82 ' '83 ' '84 ' '85 ' '86 ' '87 ' '88 ' '89 ' '90 ' '91 ' '92 ' '93 '\n",
-      " '94 ' '95 ' '96 ' '97 ' '98 ' '99 ' '100 ' '101 ' '102 ' '103 ' '104 '\n",
-      " '105 ' '106 ' '107 ' '108 ' '109 ' '110 ' '111 ' '112A' '112 ' '113 '\n",
-      " '114 ' '115 ' '116 ' '117 ' '118 ' '119 ' '120 ' '121 ' '122 ' '123 '\n",
-      " '124 ' '125 ' '126 ' '127 ' '128 ' '>' '|' '<' '1 ' '2 ' '3 ' '4 ' '5 '\n",
-      " '6 ' '7 ' '8 ' '9 ' '10 ' '11 ' '12 ' '13 ' '14 ' '15 ' '16 ' '17 ' '18 '\n",
-      " '19 ' '20 ' '21 ' '22 ' '23 ' '24 ' '25 ' '26 ' '27 ' '28 ' '29 ' '30 '\n",
-      " '31 ' '32 ' '34 ' '35 ' '36 ' '37 ' '38 ' '39 ' '40 ' '41 ' '42 ' '43 '\n",
-      " '44 ' '45 ' '46 ' '47 ' '48 ' '49 ' '50 ' '51 ' '52 ' '53 ' '54 ' '55 '\n",
-      " '56 ' '57 ' '64 ' '65 ' '66 ' '67 ' '68 ' '69 ' '70 ' '71 ' '72 ' '74 '\n",
-      " '75 ' '76 ' '77 ' '78 ' '79 ' '80 ' '83 ' '84 ' '85 ' '86 ' '87 ' '88 '\n",
-      " '89 ' '90 ' '91 ' '92 ' '93 ' '94 ' '95 ' '96 ' '97 ' '98 ' '99 ' '100 '\n",
-      " '101 ' '102 ' '103 ' '104 ' '105 ' '106 ' '107 ' '108 ' '109 ' '114 '\n",
-      " '115 ' '116 ' '117 ' '118 ' '119 ' '120 ' '121 ' '122 ' '123 ' '124 '\n",
-      " '125 ' '126 ' '127 ' '>']\n",
-      "['<EVQLLESGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDVPGHGAAFMDVWGTGTTVTVSS>|<DIQLTQSPLSLPVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKI-SNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>', '<EVQLLESGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDVPGHGAAFMDVWGTGTT----->|<-----------PVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKI-SNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>', '<------SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS>|<DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*N-RDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>']\n",
-      "[[[  9.31621838  -3.42184329  -3.59397745 ... -14.73707485  -6.8935833\n",
-      "    -0.23662776]\n",
-      "  [ -3.54718232  -5.84866619  -4.02423859 ... -12.93966579  -9.5614481\n",
-      "    -4.48473835]\n",
-      "  [-11.94997597  -2.245543    -5.69481373 ... -15.19639015 -17.97454071\n",
-      "   -12.56952095]\n",
-      "  ...\n",
-      "  [ -8.94504833  -0.42261261  -4.95588207 ... -16.66817474 -15.2224741\n",
-      "   -10.37267494]\n",
-      "  [-11.65150356  -5.44477606  -2.95585775 ... -16.25555801  -9.75158596\n",
-      "   -11.75897026]\n",
-      "  [  1.79469728  -1.95846701  -3.59784532 ... -14.95585823  -7.47080708\n",
-      "    -0.95226753]]\n",
-      "\n",
-      " [[  8.55518723  -3.83663297  -2.33595967 ... -13.87456799  -8.14840603\n",
-      "    -0.42472434]\n",
-      "  [ -4.40701294  -5.53201008  -3.69397402 ... -12.97877789  -9.86258411\n",
-      "    -4.95414352]\n",
-      "  [-11.95642853  -3.86210871  -5.80935192 ... -14.89213085 -16.94556236\n",
-      "   -11.36959839]\n",
-      "  ...\n",
-      "  [ -7.75924015  -0.66524202  -4.08643246 ... -16.16580772 -14.76507473\n",
-      "    -8.3507061 ]\n",
-      "  [-11.91039753  -4.86995983  -2.74777436 ... -16.07694817  -8.44974899\n",
-      "   -10.45223904]\n",
-      "  [  0.86006832  -2.37964034  -3.58130741 ... -15.35423565  -7.73035526\n",
-      "    -1.11989737]]\n",
-      "\n",
-      " [[ -4.37902737  -7.55587149   1.21958363 ... -15.48622513  -6.021842\n",
-      "    -3.79647374]\n",
-      "  [  0.           0.           0.         ...   0.           0.\n",
-      "     0.        ]\n",
-      "  [  0.           0.           0.         ...   0.           0.\n",
-      "     0.        ]\n",
-      "  ...\n",
-      "  [ -8.94207573  -0.51090252  -5.09760332 ... -16.69521713 -15.45450687\n",
-      "   -10.50823212]\n",
-      "  [-11.92354965  -5.55152607  -2.87666893 ... -16.40607834 -10.19431686\n",
-      "   -12.1328764 ]\n",
-      "  [  2.42200375  -2.01573253  -3.61701298 ... -14.9590435   -7.19029331\n",
-      "    -0.89830256]]]\n"
-     ]
-    }
-   ],
    "source": [
     "results = ablang(only_both_chains_seqs, mode='likelihood', align=True)\n",
     "\n",
@@ -414,60 +292,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "56be8cad",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[array([[9.9955505e-01, 2.9358694e-06, 2.4716087e-06, ..., 3.5776201e-11,\n",
-       "         9.1196831e-08, 7.0967326e-05],\n",
-       "        [4.1573694e-06, 4.1619489e-07, 2.5800944e-06, ..., 3.4650952e-10,\n",
-       "         1.0159109e-08, 1.6279575e-06],\n",
-       "        [7.8059600e-08, 1.2794037e-03, 4.0645118e-05, ..., 3.0375720e-09,\n",
-       "         1.8879491e-10, 4.2010839e-08],\n",
-       "        ...,\n",
-       "        [3.4210879e-07, 1.7195340e-03, 1.8477240e-05, ..., 1.5137445e-10,\n",
-       "         6.4255873e-10, 8.2064140e-08],\n",
-       "        [9.1038084e-09, 4.5161755e-06, 5.4411950e-05, ..., 9.1139631e-11,\n",
-       "         6.0862085e-08, 8.1761966e-09],\n",
-       "        [8.5759175e-04, 2.0104915e-05, 3.9023766e-06, ..., 4.5562460e-11,\n",
-       "         8.1156479e-08, 5.4990651e-05]], shape=(238, 26), dtype=float32),\n",
-       " array([[9.9939799e-01, 4.1499175e-06, 1.8611167e-05, ..., 1.8139243e-10,\n",
-       "         5.5649299e-08, 1.2583815e-04],\n",
-       "        [1.6735513e-06, 5.4332406e-07, 3.4143472e-06, ..., 3.1693398e-10,\n",
-       "         7.1501400e-09, 9.6832969e-07],\n",
-       "        [3.7784993e-08, 1.2377645e-04, 1.7658784e-05, ..., 2.0061326e-09,\n",
-       "         2.5737484e-10, 6.7947965e-08],\n",
-       "        ...,\n",
-       "        [1.1050455e-06, 1.3312638e-03, 4.3497097e-05, ..., 2.4686178e-10,\n",
-       "         1.0018089e-09, 6.1165900e-07],\n",
-       "        [5.7270397e-09, 6.5396339e-06, 5.4601755e-05, ..., 8.8801404e-11,\n",
-       "         1.8233513e-07, 2.4615032e-08],\n",
-       "        [7.3952030e-04, 2.8970928e-05, 8.7113440e-06, ..., 6.7168833e-11,\n",
-       "         1.3746008e-07, 1.0210846e-04]], shape=(222, 26), dtype=float32),\n",
-       " array([[9.99685407e-01, 3.35662639e-06, 1.14241482e-06, ...,\n",
-       "         2.32460891e-11, 6.88188067e-08, 5.69467156e-05],\n",
-       "        [6.38133372e-07, 1.01300586e-07, 5.64459742e-06, ...,\n",
-       "         4.09234556e-11, 2.53804799e-09, 4.31722100e-07],\n",
-       "        [1.49096788e-08, 2.04515047e-04, 9.23794141e-06, ...,\n",
-       "         7.46306961e-10, 2.92107380e-11, 2.21786500e-08],\n",
-       "        ...,\n",
-       "        [2.15093763e-07, 1.06453872e-03, 1.62486140e-05, ...,\n",
-       "         1.12102910e-10, 1.47300866e-10, 4.73037538e-08],\n",
-       "        [4.30136682e-09, 3.09317988e-06, 3.96632568e-05, ...,\n",
-       "         5.24226877e-11, 2.39579450e-08, 3.86403221e-09],\n",
-       "        [9.77773685e-04, 1.29533228e-05, 2.78623725e-06, ...,\n",
-       "         2.73364300e-11, 3.96418649e-08, 4.04014427e-05]],\n",
-       "       shape=(238, 26), dtype=float32)]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "ablang(only_both_chains_seqs, mode='probability')"
    ]
@@ -492,21 +320,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "83f3064b-48a7-42fb-ba82-ec153ea946da",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([1.96673731, 2.04801253, 2.09881898, 1.82533665, 1.97255249])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "results = ablang(all_seqs, mode='pseudo_log_likelihood')\n",
     "np.exp(-results) # convert to pseudo perplexity"
@@ -514,22 +331,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "id": "42cc8b34-5ae9-4857-93fe-a438a0f2a868",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([1.2636038, 1.126463 , 1.3123759, 1.2140924, 1.1805094],\n",
-       "      dtype=float32)"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "results = ablang(all_seqs, mode='confidence')\n",
     "np.exp(-results)"
@@ -547,24 +352,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "2d5b725c-4eac-4a4b-9331-357c3ac140f7",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array(['<EVQLLESGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDVPGHGAAFMDVWGTGTTVTVSS>|<DIQLTQSPLSLPVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKISNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>',\n",
-       "       '<EVQLLESGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDVPGHGAAFMDVWGTGTT>|<PVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKISNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>',\n",
-       "       '<EVQLVQSGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDPPGHGAAFMDVWGTGTTVTVSS>|<DIQLTQSPLSLPVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKISNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>'],\n",
-       "      dtype='<U238')"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "restored = ablang(only_both_chains_seqs, mode='restore')\n",
     "restored"
@@ -572,24 +363,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "id": "0e9615f7-c490-4947-96f4-7617266c686e",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array(['<EVQLLESGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDVPGHGAAFMDVWGTGTTVTVSS>|<DIQLTQSPLSLPVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKISNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>',\n",
-       "       '<EVQLLESGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDVPGHGAAFMDVWGTGTTVTVSS>|<DVVMTQSPLSLPVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKISNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>',\n",
-       "       '<QVQLVQSGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCARDPPGHGAAFMDVWGTGTTVTVSS>|<DIQLTQSPLSLPVTLGQPASISCRSSQSLEASDTNIYLSWFQQRPGQSPRRLIYKISNRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK>'],\n",
-       "      dtype='<U238')"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "restored = ablang(only_both_chains_seqs, mode='restore', align = True)\n",
     "restored"

    "id": "6d66ad84",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a5acedae3cc4420ea2971400b0915426",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5727addb151447cf9bb091ef1159717c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "configuration_ablang2paired.py: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "A new version of the following files was downloaded from https://huggingface.co/hemantn/ablang2:\n",
       "- configuration_ablang2paired.py\n",
+      ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
      ]
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "845b45d4aed542dc86ab7b7ac3305a0e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "modeling_ablang2paired.py: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Encountered exception while importing ablang2: No module named 'ablang2'\n"
+     ]
+    },
+    {
+     "ename": "ImportError",
+     "evalue": "This modeling file requires the following packages that were not found in your environment: ablang2. Run `pip install ablang2`",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Load model and tokenizer from Hugging Face Hub\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhemantn/ablang2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhemantn/ablang2\u001b[39m\u001b[38;5;124m\"\u001b[39m, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# Find the cached model directory and import adapter\u001b[39;00m\n",
+      "File \u001b[0;32m~/.conda/envs/lib_transformer/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:582\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m    579\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madapter_kwargs\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m adapter_kwargs\n\u001b[1;32m    581\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_remote_code \u001b[38;5;129;01mand\u001b[39;00m trust_remote_code:\n\u001b[0;32m--> 582\u001b[0m     model_class \u001b[38;5;241m=\u001b[39m \u001b[43mget_class_from_dynamic_module\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    583\u001b[0m \u001b[43m        \u001b[49m\u001b[43mclass_ref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcode_revision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcode_revision\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    584\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    585\u001b[0m     _ \u001b[38;5;241m=\u001b[39m hub_kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcode_revision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m    586\u001b[0m     \u001b[38;5;66;03m# This block handles the case where the user is loading a model with `trust_remote_code=True`\u001b[39;00m\n\u001b[1;32m    587\u001b[0m     \u001b[38;5;66;03m# but a library model exists with the same name. We don't want to override the autoclass\u001b[39;00m\n\u001b[1;32m    588\u001b[0m     \u001b[38;5;66;03m# mappings in this case, or all future loads of that model will be the remote code model.\u001b[39;00m\n",
+      "File \u001b[0;32m~/.conda/envs/lib_transformer/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:570\u001b[0m, in \u001b[0;36mget_class_from_dynamic_module\u001b[0;34m(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, code_revision, **kwargs)\u001b[0m\n\u001b[1;32m    568\u001b[0m     code_revision \u001b[38;5;241m=\u001b[39m revision\n\u001b[1;32m    569\u001b[0m \u001b[38;5;66;03m# And lastly we get the class inside our newly created module\u001b[39;00m\n\u001b[0;32m--> 570\u001b[0m final_module \u001b[38;5;241m=\u001b[39m \u001b[43mget_cached_module_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    571\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    572\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodule_file\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.py\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    573\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    574\u001b[0m \u001b[43m    \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    575\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    576\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    577\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    578\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcode_revision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    579\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    580\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    581\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    582\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m get_class_in_module(class_name, final_module, force_reload\u001b[38;5;241m=\u001b[39mforce_download)\n",
+      "File \u001b[0;32m~/.conda/envs/lib_transformer/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:393\u001b[0m, in \u001b[0;36mget_cached_module_file\u001b[0;34m(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    390\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m    392\u001b[0m \u001b[38;5;66;03m# Check we have all the requirements in our environment\u001b[39;00m\n\u001b[0;32m--> 393\u001b[0m modules_needed \u001b[38;5;241m=\u001b[39m \u001b[43mcheck_imports\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresolved_module_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    395\u001b[0m \u001b[38;5;66;03m# Now we move the module inside our cached dynamic modules.\u001b[39;00m\n\u001b[1;32m    396\u001b[0m full_submodule \u001b[38;5;241m=\u001b[39m TRANSFORMERS_DYNAMIC_MODULE_NAME \u001b[38;5;241m+\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39msep \u001b[38;5;241m+\u001b[39m submodule\n",
+      "File \u001b[0;32m~/.conda/envs/lib_transformer/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:225\u001b[0m, in \u001b[0;36mcheck_imports\u001b[0;34m(filename)\u001b[0m\n\u001b[1;32m    222\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m    224\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_packages) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 225\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m    226\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis modeling file requires the following packages that were not found in your environment: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    227\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(missing_packages)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Run `pip install \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(missing_packages)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    228\u001b[0m     )\n\u001b[1;32m    230\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m get_relative_imports(filename)\n",
+      "\u001b[0;31mImportError\u001b[0m: This modeling file requires the following packages that were not found in your environment: ablang2. Run `pip install ablang2`"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "ceae4a88-0679-4704-8bad-c06a4569c497",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "d22f4302-1262-4cc1-8a1c-a36daa8c710c",
    "metadata": {},
+   "outputs": [],
    "source": [
     "ablang(all_seqs, mode='seqcoding')\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "6227f661-575f-4b1e-9646-cfba7b10c3b4",
    "metadata": {},
+   "outputs": [],
    "source": [
     "ablang(all_seqs, mode='rescoding', stepwise_masking = False)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e4bc0cb1-f5b0-4255-9e93-d643ae1396df",
    "metadata": {},
+   "outputs": [],
    "source": [
     "results = ablang(only_both_chains_seqs, mode='likelihood', align=True)\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "56be8cad",
    "metadata": {},
+   "outputs": [],
    "source": [
     "ablang(only_both_chains_seqs, mode='probability')"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "83f3064b-48a7-42fb-ba82-ec153ea946da",
    "metadata": {},
+   "outputs": [],
    "source": [
     "results = ablang(all_seqs, mode='pseudo_log_likelihood')\n",
     "np.exp(-results) # convert to pseudo perplexity"
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "42cc8b34-5ae9-4857-93fe-a438a0f2a868",
    "metadata": {},
+   "outputs": [],
    "source": [
     "results = ablang(all_seqs, mode='confidence')\n",
     "np.exp(-results)"
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "2d5b725c-4eac-4a4b-9331-357c3ac140f7",
    "metadata": {},
+   "outputs": [],
    "source": [
     "restored = ablang(only_both_chains_seqs, mode='restore')\n",
     "restored"
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "0e9615f7-c490-4947-96f4-7617266c686e",
    "metadata": {},
+   "outputs": [],
    "source": [
     "restored = ablang(only_both_chains_seqs, mode='restore', align = True)\n",
     "restored"

test_module_loading.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import sys
+import os
+import numpy as np
+from transformers import AutoModel, AutoTokenizer
+from transformers.utils import cached_file
+# Load model and tokenizer from Hugging Face Hub
+model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
+# Find the cached model directory and import adapter
+adapter_path = cached_file("hemantn/ablang2", "adapter.py")
+cached_model_dir = os.path.dirname(adapter_path)
+sys.path.insert(0, cached_model_dir)
+# Import and create the adapter
+from adapter import AbLang2PairedHuggingFaceAdapter
+ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)