Nelly-43 commited on
Commit
ceeeba7
Β·
verified Β·
1 Parent(s): 71ec02c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -48
app.py CHANGED
@@ -17,54 +17,24 @@ pipe = pipeline(
17
  token=os.getenv('HF_TOKEN'),
18
  )
19
 
 
 
 
20
 
21
- # Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
22
- def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
23
- if seconds is not None:
24
- milliseconds = round(seconds * 1000.0)
25
-
26
- hours = milliseconds // 3_600_000
27
- milliseconds -= hours * 3_600_000
28
-
29
- minutes = milliseconds // 60_000
30
- milliseconds -= minutes * 60_000
31
-
32
- seconds = milliseconds // 1_000
33
- milliseconds -= seconds * 1_000
34
-
35
- hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
36
- return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
37
- else:
38
- # we have a malformed timestamp so just return it as is
39
- return seconds
40
-
41
-
42
- def transcribe(file, task, return_timestamps):
43
- outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
44
- text = outputs["text"]
45
- if return_timestamps:
46
- timestamps = outputs["chunks"]
47
- timestamps = [
48
- f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
49
- for chunk in timestamps
50
- ]
51
- text = "\n".join(str(feature) for feature in timestamps)
52
- return text
53
-
54
 
55
  demo = gr.Blocks()
56
-
57
- mic_transcribe = gr.Interface(
58
  fn=transcribe,
59
  inputs=[
60
- gr.inputs.Audio(source="microphone", type="filepath", optional=True),
61
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
62
- gr.inputs.Checkbox(default=False, label="Return timestamps"),
63
  ],
64
  outputs="text",
65
- layout="horizontal",
66
  theme="huggingface",
67
- title="Luganda Whisper Demo: Transcribe Audio",
68
  description=(
69
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
70
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
@@ -76,22 +46,21 @@ mic_transcribe = gr.Interface(
76
  file_transcribe = gr.Interface(
77
  fn=transcribe,
78
  inputs=[
79
- gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
80
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
81
- gr.inputs.Checkbox(default=False, label="Return timestamps"),
82
  ],
83
  outputs="text",
84
- layout="horizontal",
85
  theme="huggingface",
86
- title="Luganda Whisper Demo: Transcribe Audio",
87
  description=(
88
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
89
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
90
  " of arbitrary length."
91
  ),
92
  examples=[
93
- ["./ama_log-1514-E30_17.wav", "transcribe", False],
94
- ["./ng_log-1614-E2_364.wav", "transcribe", True],
95
  ],
96
  cache_examples=True,
97
  allow_flagging="never",
@@ -100,4 +69,5 @@ file_transcribe = gr.Interface(
100
  with demo:
101
  gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
102
 
103
- demo.launch(enable_queue=True)
 
 
17
  token=os.getenv('HF_TOKEN'),
18
  )
19
 
20
+ def transcribe(inputs, task):
21
+ if inputs is None:
22
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
23
 
24
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
25
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  demo = gr.Blocks()
28
+ ic_transcribe = gr.Interface(
 
29
  fn=transcribe,
30
  inputs=[
31
+ gr.Audio(sources="microphone", type="filepath"),
32
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
 
33
  ],
34
  outputs="text",
35
+ # layout="horizontal",
36
  theme="huggingface",
37
+ title="Bambara Whisper Demo: Transcribe Audio",
38
  description=(
39
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
40
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
 
46
  file_transcribe = gr.Interface(
47
  fn=transcribe,
48
  inputs=[
49
+ gr.Audio(sources="upload", label="Audio file", type="filepath"),
50
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
 
51
  ],
52
  outputs="text",
53
+ # layout="horizontal",
54
  theme="huggingface",
55
+ title="Bambara Whisper Demo: Transcribe Audio",
56
  description=(
57
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
58
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
59
  " of arbitrary length."
60
  ),
61
  examples=[
62
+ ["./ama_log-1514-E30_17.wav", "transcribe"],
63
+ ["./ng_log-1614-E2_364.wav", "transcribe"],
64
  ],
65
  cache_examples=True,
66
  allow_flagging="never",
 
69
  with demo:
70
  gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
71
 
72
+ demo.queue(max_size=10)
73
+ demo.launch()