Morgan Funtowicz commited on
Commit
ead11a7
·
1 Parent(s): c9543c7

feat(whisper): correctly detect timestamp tokens when decoding segments

Browse files
Files changed (1) hide show
  1. endpoint.py +2 -2
endpoint.py CHANGED
@@ -130,7 +130,7 @@ def process_chunk(
130
  timestamp_offset: int,
131
  ) -> Generator:
132
  # Some constants
133
- k_timestamp_token = lru_cache(tokenizer.convert_tokens_to_ids)("<|0.00|>")
134
 
135
  # Detect start of transcript token
136
  # sot_mask = ids == k_sot_token
@@ -280,7 +280,7 @@ class WhisperHandler(Handler[TranscriptionRequest, TranscriptionResponse]):
280
  # Compute initial prompt for the segment
281
  is_verbose = request.response_kind == TranscriptionResponseKind.VERBOSE_JSON
282
  language = convert_tokens_to_ids(f"<|{request.language}|>")
283
- timestamp = convert_tokens_to_ids(f"<|{timestamp:.2f}|>" if is_verbose else '<|notimestamps|>')
284
  prompt = create_prompt(audio_chunk, WhisperHandler.WHISPER_SAMPLING_RATE, language, timestamp)
285
 
286
  # Submit the task
 
130
  timestamp_offset: int,
131
  ) -> Generator:
132
  # Some constants
133
+ k_timestamp_token = lru_cache(tokenizer.convert_tokens_to_ids)(f"<|0.00|>")
134
 
135
  # Detect start of transcript token
136
  # sot_mask = ids == k_sot_token
 
280
  # Compute initial prompt for the segment
281
  is_verbose = request.response_kind == TranscriptionResponseKind.VERBOSE_JSON
282
  language = convert_tokens_to_ids(f"<|{request.language}|>")
283
+ timestamp = convert_tokens_to_ids(f"<|0.00|>" if is_verbose else '<|notimestamps|>')
284
  prompt = create_prompt(audio_chunk, WhisperHandler.WHISPER_SAMPLING_RATE, language, timestamp)
285
 
286
  # Submit the task