from transformers import pipeline from pydub import AudioSegment import torch from starlette.applications import Starlette from starlette.responses import JSONResponse from starlette.routing import Route import asyncio input = "layout_detection_3min.flac" # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print(f"device {device}") # audio = AudioSegment.from_file("./data/Layout Detection.m4a", "m4a") # offset_seconds = 180 * 1000 # audio = audio[:offset_seconds] # audio.export(input, format="flac") device = "cuda:2" if torch.cuda.is_available() else "cpu" # res = transcriber(input) # res = pipe(input) # print(res) res=None async def homepage(request): global res payload = await request.body() input = payload.decode("utf-8") response_q = asyncio.Queue() await request.app.model_queue.put((input, response_q)) output = await response_q.get() res = output return JSONResponse(output) async def show(request): global res print(f"resquest {request}") return JSONResponse(res) async def server_loop(q): pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-large", chunk_length_s=30, device=device, ) pipe.model.config.forced_decoder_ids = ( pipe.tokenizer.get_decoder_prompt_ids( language="zh", task="transcribe" ) ) while True: (input, response_q) = await q.get() print(f"input {input}") print(f"response_q {response_q}") out = pipe(input) await response_q.put(out) app = Starlette( routes=[ Route("/", homepage, methods=["POST"]), Route("/show", show, methods=["GET"]) ], ) @app.on_event("startup") async def startup_event(): q = asyncio.Queue() app.model_queue = q asyncio.create_task(server_loop(q))