Morgan Funtowicz
commited on
Commit
·
ead11a7
1
Parent(s):
c9543c7
feat(whisper): correctly detect timestamp tokens when decoding segments
Browse files- endpoint.py +2 -2
endpoint.py
CHANGED
|
@@ -130,7 +130,7 @@ def process_chunk(
|
|
| 130 |
timestamp_offset: int,
|
| 131 |
) -> Generator:
|
| 132 |
# Some constants
|
| 133 |
-
k_timestamp_token = lru_cache(tokenizer.convert_tokens_to_ids)("<|0.00|>")
|
| 134 |
|
| 135 |
# Detect start of transcript token
|
| 136 |
# sot_mask = ids == k_sot_token
|
|
@@ -280,7 +280,7 @@ class WhisperHandler(Handler[TranscriptionRequest, TranscriptionResponse]):
|
|
| 280 |
# Compute initial prompt for the segment
|
| 281 |
is_verbose = request.response_kind == TranscriptionResponseKind.VERBOSE_JSON
|
| 282 |
language = convert_tokens_to_ids(f"<|{request.language}|>")
|
| 283 |
-
timestamp = convert_tokens_to_ids(f"<|
|
| 284 |
prompt = create_prompt(audio_chunk, WhisperHandler.WHISPER_SAMPLING_RATE, language, timestamp)
|
| 285 |
|
| 286 |
# Submit the task
|
|
|
|
| 130 |
timestamp_offset: int,
|
| 131 |
) -> Generator:
|
| 132 |
# Some constants
|
| 133 |
+
k_timestamp_token = lru_cache(tokenizer.convert_tokens_to_ids)(f"<|0.00|>")
|
| 134 |
|
| 135 |
# Detect start of transcript token
|
| 136 |
# sot_mask = ids == k_sot_token
|
|
|
|
| 280 |
# Compute initial prompt for the segment
|
| 281 |
is_verbose = request.response_kind == TranscriptionResponseKind.VERBOSE_JSON
|
| 282 |
language = convert_tokens_to_ids(f"<|{request.language}|>")
|
| 283 |
+
timestamp = convert_tokens_to_ids(f"<|0.00|>" if is_verbose else '<|notimestamps|>')
|
| 284 |
prompt = create_prompt(audio_chunk, WhisperHandler.WHISPER_SAMPLING_RATE, language, timestamp)
|
| 285 |
|
| 286 |
# Submit the task
|