|
|
from DLM_emb_model import MolEmbDLM |
|
|
from transformers import AutoTokenizer |
|
|
import torch |
|
|
|
|
|
MODEL_DIR = "/data2/tianang/projects/mdlm/huggingface/huggingface_model" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) |
|
|
|
|
|
model = MolEmbDLM.from_pretrained(MODEL_DIR) |
|
|
model.eval() |
|
|
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
|
model = model.to(device) |
|
|
|
|
|
seq = "[C][C][O]" |
|
|
batch = tokenizer( |
|
|
seq.replace('][', '] ['), |
|
|
padding=False, |
|
|
truncation=False, |
|
|
return_tensors="pt", |
|
|
) |
|
|
print(batch) |
|
|
|
|
|
batch.to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
embeddings = model( |
|
|
input_ids=batch["input_ids"], |
|
|
attention_mask=batch["attention_mask"], |
|
|
) |
|
|
|
|
|
|
|
|
print(embeddings.shape) |