|
import gradio as gr
|
|
from fastapi import FastAPI, Request
|
|
import uvicorn
|
|
from sentence_transformers import SentenceTransformer
|
|
from sentence_transformers.util import cos_sim
|
|
from sentence_transformers.quantization import quantize_embeddings
|
|
|
|
|
|
import spaces
|
|
|
|
|
|
|
|
app = FastAPI()
|
|
|
|
|
|
@spaces.GPU
|
|
def embed(text):
|
|
|
|
query_embedding = Embedder.encode(text)
|
|
return query_embedding.tolist();
|
|
|
|
|
|
|
|
@app.post("/v1/embeddings")
|
|
async def openai_embeddings(request: Request):
|
|
body = await request.json();
|
|
print(body);
|
|
|
|
model = body['model']
|
|
text = body['input'];
|
|
embeddings = embed(text)
|
|
return {
|
|
'object': "list"
|
|
,'data': [{
|
|
'object': "embeddings"
|
|
,'embedding': embeddings
|
|
,'index':0
|
|
}]
|
|
,'model':model
|
|
,'usage':{
|
|
'prompt_tokens': 0
|
|
,'total_tokens': 0
|
|
}
|
|
}
|
|
|
|
def fn(text):
|
|
embed(text);
|
|
|
|
with gr.Blocks(fill_height=True) as demo:
|
|
text = gr.Textbox();
|
|
embeddings = gr.Textbox()
|
|
|
|
text.submit(fn, [text], [embeddings]);
|
|
|
|
|
|
print("Loading embedding model");
|
|
Embedder = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Demo run...");
|
|
(app,url,other) = demo.launch(prevent_thread_lock=True);
|
|
|
|
print("Mounting app...");
|
|
GradioApp = gr.mount_gradio_app(app, demo, path="/", ssr_mode=False);
|
|
|
|
if __name__ == '__main__':
|
|
print("Running uviconr...");
|
|
uvicorn.run(GradioApp)
|
|
|
|
|
|
|
|
|