Harpyia/src/recognizer/strategies/fast_whisper_strategy.py
Sviatoslav Tsariov 37adf74745 Implemented strategies for sos and number recognition
Deleted message sender and promt service

Implemented fast whisper, but it is not working

WavStack refactored into QueueStack, which can use different strategies for proccessing
2024-03-22 18:59:42 +03:00

59 lines
1.5 KiB
Python

import sys
import whisper
from faster_whisper import WhisperModel
import config
from recognizer.strategies import BaseRecognizerStrategy
class FastWhisperStrategy(BaseRecognizerStrategy):
def __init__(self) -> None:
self._model = WhisperModel(
model_size_or_path=config.HARPYIA_MODEL,
device=config.DEVICE,
num_workers=config.WHISPER_NUM_WORKERS,
cpu_threads=config.WHISPER_CPU_THREADS
)
def recognize(self, file, language, prompt) -> any:
audio = self._prepare_file(file.name)
return self._transcribe(audio, language, prompt)
def _prepare_file(self, filename: str):
audio = whisper.load_audio(filename, sr=config.HARPYIA_SAMPLE_RATE)
audio = whisper.pad_or_trim(audio)
return audio
def _transcribe(self, audio, language, prompt):
segments, _ = self._model.transcribe(
audio,
language=language,
initial_prompt=prompt,
condition_on_previous_text=False,
vad_filter=True,
beam_size=config.WHISPER_BEAM_SIZE,
)
print('Segments:', file=sys.stderr)
for i in segments:
print(i, file=sys.stderr)
words = []
for segment in list(segments):
words.append(segment.text)
return {
'text': ' '.join(words),
'segments': {
'id': None,
'seek': None,
'start': None,
'end': None,
'text': None,
'tokens': None,
'temperature': None,
'avg_logprob': None,
'compression_ratio': None,
'no_speech_prob': None,
}
}