import gradio as gr from fastapi import FastAPI, Request import uvicorn # from sentence_transformers import SentenceTransformer # from sentence_transformers.util import cos_sim # from sentence_transformers.quantization import quantize_embeddings import spaces app = FastAPI() @spaces.GPU def embed(text): query_embedding = Embedder.encode(text) return query_embedding.tolist(); #@app.post("/v1/embeddings") #async def openai_embeddings(request: Request): # body = await request.json(); # print(body); # # model = body['model'] # text = body['input']; # embeddings = embed(text) # return { # 'object': "list" # ,'data': [{ # 'object': "embeddings" # ,'embedding': embeddings # ,'index':0 # }] # ,'model':model # ,'usage':{ # 'prompt_tokens': 0 # ,'total_tokens': 0 # } # } def fn(text): embed(text); with gr.Blocks(fill_height=True) as demo: text = gr.Textbox(); embeddings = gr.Textbox() text.submit(fn, [text], [embeddings]); print("Loading embedding model"); Embedder = None #SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1") # demo.run_startup_events() GradioApp = gr.mount_gradio_app(app, demo, path="/", ssr_mode=False); #demo.launch( # share=False, # debug=False, # server_port=7860, # server_name="0.0.0.0", # allowed_paths=[] #) print("Demo run..."); demo.launch(); print("Running uviconr..."); if __name__ == '__main__': uvicorn.run(GradioApp)