anji29 commited on
Commit
7a4200c
·
verified ·
1 Parent(s): e2b575f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel
3
+ import torch
4
+
5
+ # Load public model
6
+ model_name = "intfloat/multilingual-e5-small"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModel.from_pretrained(model_name)
9
+
10
+ # Inference function
11
+ def get_embedding(text):
12
+ # E5 models expect: "query: your text here"
13
+ encoded_input = tokenizer("query: " + text, return_tensors='pt')
14
+ with torch.no_grad():
15
+ model_output = model(**encoded_input)
16
+
17
+ embeddings = model_output.last_hidden_state[:, 0] # CLS token
18
+ normed = torch.nn.functional.normalize(embeddings, p=2, dim=1)
19
+ return normed[0].tolist() # return list
20
+
21
+ # Gradio UI
22
+ iface = gr.Interface(fn=get_embedding,
23
+ inputs=gr.Textbox(label="Enter text"),
24
+ outputs=gr.Textbox(label="Embedding"),
25
+ title="Text Embedder")
26
+
27
+ iface.launch()